Flow Cytometry

Author

Susan Holmes, Laura Symul

Published

December 24, 2025

Code
library(tidyverse)
library(magrittr)
library(gt)
library(patchwork)
library(SummarizedExperiment)
library(tidySummarizedExperiment)
library(flowCore)
library(flowWorkspace)
library(CytoML)
library(ggcyto)

tmp <- fs::dir_map("R scripts/", source)
tmp <- fs::dir_map("R scripts Flow/", source)
rm(tmp)
Code
theme_set(theme_light())
Code
flow_dir <- str_c(get_data_dir(), "06 Flow cytometry/")
output_dir <- 
  str_c(
    get_01_output_dir(),
    "06 Flow cytometry tmp files/"
    )

if (!dir.exists(output_dir)){
  dir.create(output_dir, recursive = TRUE)
}

This document describes the processing of the flow cytometry data from both MGH and CAP sites. The processing consists of the following steps:

  1. Creating the file inventory: This step involves parsing the directory structure to identify relevant WSP and FCS files for analysis.
  2. Verifying file data: This step checks the consistency between the expected samples (based on sample lists) and the actual files found in the inventory.
  3. Extracting gating structure: This step involves applying the gating strategy defined in the WSP files to the corresponding FCS files to extract cell population data.

These steps are done separately for samples from each site (MGH and CAP). Once this is done, the results are combined into a single dataset, from which a SummarizedExperiment object is created and exported for further analysis.

Finally, we perform some basic data quality control checks to document the integrity of the data.

Important

The extraction functions will need to be modified to extract the number of total immune cells and compute the percentages of children populations out of total immune cells. This will be done later.

MGH Flow data

Step 1: Creating the file inventory

This step parse the MGH flow cytometry data directory structure to create an inventory of WSP and FCS files that are relevant for the analysis. It looks for WSP files that contain “068” in their names and matches them with corresponding FCS files in the same directory.

Code
base_dir <- str_c(flow_dir, "MGH/")
out_dir <- str_c(output_dir, "MGH/step 01")

# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir, recursive = TRUE)
}

# Find all WSP files in the entire directory structure
all_wsp_files <- list.files(base_dir, pattern = "\\.wsp$", recursive = TRUE, full.names = TRUE)
cat("Total WSP files found:", length(all_wsp_files), "\n")
Total WSP files found: 84 
Code
# Focus on WSP files that contain "068" 
relevant_wsp_files <- all_wsp_files[grepl("068", all_wsp_files)]
cat("WSP files related to 068 samples:", length(relevant_wsp_files), "\n")
WSP files related to 068 samples: 84 
Code
# Initialize vectors for valid files
valid_wsp_files <- character(0)
valid_fcs_files <- character(0)

# For each WSP file, find corresponding FCS file with MGH_Cyto_068 pattern in the same folder
for (wsp_file in relevant_wsp_files) {
  wsp_dir <- dirname(wsp_file)
  
  # Look for FCS files in the same directory with MGH_Cyto_068 pattern
  matching_fcs <- list.files(wsp_dir, pattern = "MGH_Cyto_068.*\\.fcs$", full.names = TRUE)
  
  # If we found matching FCS files
  if (length(matching_fcs) > 0) {
    # Take the first match (should typically be only one per folder)
    valid_wsp_files <- c(valid_wsp_files, wsp_file)
    valid_fcs_files <- c(valid_fcs_files, matching_fcs[1])
    
    cat("Found: WSP:", basename(wsp_file), "→ FCS:", basename(matching_fcs[1]), "\n")
  } else {
    # Try a more relaxed pattern if the strict one doesn't match
    relaxed_fcs <- list.files(wsp_dir, pattern = ".*068.*\\.fcs$", full.names = TRUE)
    
    if (length(relaxed_fcs) > 0) {
      # Prefer files with MGH or Cyto in the name if available
      mgh_matches <- relaxed_fcs[grepl("MGH|Cyto", relaxed_fcs)]
      if (length(mgh_matches) > 0) {
        valid_wsp_files <- c(valid_wsp_files, wsp_file)
        valid_fcs_files <- c(valid_fcs_files, mgh_matches[1])
        
        cat("Found (relaxed): WSP:", basename(wsp_file), "→ FCS:", basename(mgh_matches[1]), "\n")
      } else {
        valid_wsp_files <- c(valid_wsp_files, wsp_file)
        valid_fcs_files <- c(valid_fcs_files, relaxed_fcs[1])
        
        cat("Found (very relaxed): WSP:", basename(wsp_file), "→ FCS:", basename(relaxed_fcs[1]), "\n")
      }
    } else {
      cat("No matching FCS found for:", basename(wsp_file), "in", wsp_dir, "\n")
    }
  }
}
Found: WSP: 068_10_0004_V1_1000.wsp → FCS: MGH_Cyto_068_10_0004_V1_1000.fcs 
Found: WSP: 068_10_0004_1100.wsp → FCS: MGH_Cyto_068_10_0004_V2_1100.fcs 
Found: WSP: 068_10_0004_1500.wsp → FCS: MGH_Cyto_068_10_0004_V6_1500.fcs 
Found: WSP: 068_10_0004_2120.wsp → FCS: MGH_Cyto_068_10_0004_V9_2120.fcs 
Found: WSP: 068_10_0006_1000.wsp → FCS: MGH_Cyto_068_10_0006_V1_1000.fcs 
Found: WSP: 068_10_0006_1100.wsp → FCS: MGH_Cyto_068_10_0006_V2_1100.fcs 
Found: WSP: 068_10_0006_1500.wsp → FCS: MGH_Cyto_068_10_0006_V6_1500.fcs 
Found: WSP: 068_10_0006_2120.wsp → FCS: MGH_Cyto_068_10_0006_V9_2120.fcs 
Found: WSP: 068_10_0010_1000.wsp → FCS: MGH_Cyto_068_10_0010_V1_1000.fcs 
Found: WSP: 068_10_0010_1200.wsp → FCS: MGH_Cyto_068_10_0010_V3_1200.fcs 
Found: WSP: 068_10_0010_1500.wsp → FCS: MGH_Cyto_068_10_0010_V6_1500.fcs 
Found: WSP: 068_10_0010_2120.wsp → FCS: MGH_Cyto_068_10_0010_V9_2120.fcs 
Found: WSP: 068_10_0011_1000.wsp → FCS: MGH_Cyto_068_10_0011_V1_1000.fcs 
Found: WSP: 068_10_0011_1100.wsp → FCS: MGH_Cyto_068_10_0011_V2_1100.fcs 
Found: WSP: 068_10_0011_1500.wsp → FCS: MGH_Cyto_068_10_0011_V6_1500.fcs 
Found: WSP: 068_10_0011_2120.wsp → FCS: MGH_Cyto_068_10_0011_V9_2120.fcs 
Found: WSP: 068_10_0013_1000.wsp → FCS: MGH_Cyto_068_10_0013_V1_1000.fcs 
Found: WSP: 068_10_0013_1100.wsp → FCS: MGH_Cyto_068_10_0013_V2_1100.fcs 
Found: WSP: 068_10_0016_1000.wsp → FCS: MGH_Cyto_068_10_0016_V1_1000.fcs 
Found: WSP: 068_10_0026_1000.wsp → FCS: MGH_Cyto_068_10_0026_V1_1000.fcs 
Found: WSP: 068_10_0026_1100.wsp → FCS: MGH_Cyto_068_10_0026_V2_1100.fcs 
Found: WSP: 068_10_0026_V6_1500.wsp → FCS: MGH_Cyto_068_10_0026_V6_1500.fcs 
Found: WSP: 068_10_0026_V9_2120.wsp → FCS: MGH_Cyto_068_10_0026_V9_2120.fcs 
Found: WSP: 068_10_0029_1000.wsp → FCS: MGH_Cyto_068_10_0029_V1_1000.fcs 
Found: WSP: 068_10_0029_1100.wsp → FCS: MGH_Cyto_068_10_0029_V2_1100.fcs 
Found: WSP: 068_10_0029-1500.wsp → FCS: MGH_Cyto_068_10_0029_V6_1500.fcs 
Found: WSP: 068_10_0029_V9_2120.wsp → FCS: MGH_Cyto_068_10_0029_V9_2120.fcs 
Found: WSP: 068_10_0036_1000.wsp → FCS: MGH_Cyto_068_10_0036_V1_1000.fcs 
Found: WSP: 068_10_0036_1100.wsp → FCS: MGH_Cyto_068_10_0036_V2_1100.fcs 
Found: WSP: 068_10_0036_V6_1500.wsp → FCS: MGH_Cyto_068_10_0036_V6_1500.fcs 
Found: WSP: 068_10_0036_V9_2120.wsp → FCS: MGH_Cyto_068_10_0036_V9_2120.fcs 
Found: WSP: 068_10_0040_1000.wsp → FCS: MGH_Cyto_068_10_0040_V1_1000.fcs 
Found: WSP: 068_10_0040_V2_1100.wsp → FCS: MGH_Cyto_068_10_0040_V2_1100.fcs 
Found: WSP: 068_10_0040_V6_1500.wsp → FCS: MGH_Cyto_068_10_0040_V6_1500.fcs 
Found (relaxed): WSP: 068_10_0040_V9_2120.wsp → FCS: 03142024_068-10-00040 V9.fcs 
Found: WSP: 068_10_0044_V1_1000.wsp → FCS: MGH_Cyto_068_10_0044_V1_1000.fcs 
Found: WSP: 068_10_0044_1100.wsp → FCS: MGH_Cyto_068_10_0044_V2_1100.fcs 
Found: WSP: 068_10_0044_V6_1500.wsp → FCS: MGH_Cyto_068_10_0044_V6_1500.fcs 
Found: WSP: 068_10_0044_V9_2120.wsp → FCS: MGH_Cyto_068_10_0044_V9_2120.fcs 
Found: WSP: 068_10_0045_1000.wsp → FCS: MGH_Cyto_068_10_0045_V1_1000.fcs 
Found: WSP: 068_10_0045_V2_1100.wsp → FCS: MGH_Cyto_068_10_0045_V2_1100.fcs 
Found: WSP: 068_10_0046_V1_1000.wsp → FCS: MGH_Cyto_068_10_0046_V1_1000.fcs 
Found: WSP: 068_10_0046_1100.wsp → FCS: MGH_Cyto_068_10_0046_V2_1100.fcs 
Found: WSP: 068_10_0046_V6_1500.wsp → FCS: MGH_Cyto_068_10_0046_V6_1500.fcs 
Found: WSP: 068_10_0046_V9_2120.wsp → FCS: MGH_Cyto_068_10_0046_V9_2120.fcs 
Found: WSP: 068_10_0049_V1_1000.wsp → FCS: MGH_Cyto_068_10_0049_V1_1000.fcs 
Found: WSP: 068_10_0049_V2_1100.wsp → FCS: MGH_Cyto_068_10_0049_V2_1100.fcs 
Found: WSP: 068_10_0049_V6_1500.wsp → FCS: MGH_Cyto_068_10_0049_V6_1500.fcs 
Found: WSP: 068_10_0049_V9_2120.wsp → FCS: MGH_Cyto_068_10_0049_V9_2120.fcs 
Found: WSP: 068_10_0050_V1_1000.wsp → FCS: MGH_Cyto_068_10_0050_V1_1000.fcs 
Found: WSP: 068_10_0050_V2_1100.wsp → FCS: MGH_Cyto_068_10_0050_V2_1100.fcs 
Found: WSP: 068_10_0050_V7_1700.wsp → FCS: MGH_Cyto_068_10_0050_V7_1700.fcs 
Found: WSP: 068_10_0052_V1_1000.wsp → FCS: MGH_Cyto_068_10_0052_V1_1000.fcs 
Found: WSP: 068_10_0052_V2_1100.wsp → FCS: MGH_Cyto_068_10_0052_V2_1100.fcs 
Found: WSP: 068_10_0052_V6_1500.wsp → FCS: MGH_Cyto_068_10_0052_V6_1500.fcs 
Found: WSP: 068_10_0052_V9_2120.wsp → FCS: MGH_Cyto_068_10_0052_V9_2120.fcs 
Found: WSP: 068_10_0053_V1_1000.wsp → FCS: MGH_Cyto_068_10_0053_V1_1000.fcs 
Found: WSP: 068_10_0053_V6_1500.wsp → FCS: MGH_Cyto_068_10_0053_V6_1500.fcs 
Found: WSP: 068_10_0053_V9_2120.wsp → FCS: MGH_Cyto_068_10_0053_V9_007.fcs 
Found: WSP: 068_10_0054_V1_1000.wsp → FCS: MGH_Cyto_068_10_0054_V1_1000.fcs 
Found: WSP: 068_10_0054_V2_1100.wsp → FCS: MGH_Cyto_068_10_0054_V2_1100.fcs 
Found: WSP: 068_10_0054_V6_1500.wsp → FCS: MGH_Cyto_068_10_0054_V6_1500.fcs 
Found: WSP: 068_10_0054_V9_2120.wsp → FCS: MGH_Cyto_068_10_0054_V9_2120.fcs 
Found: WSP: 068_10_0056_1000.wsp → FCS: MGH_Cyto_068_10_0056_V1_1000.fcs 
Found: WSP: 068_10_0058_V1_1000.wsp → FCS: MGH_Cyto_068_10_0058_V1_1000.fcs 
Found: WSP: 068_10_0058_V2_1100.wsp → FCS: MGH_Cyto_068_10_0058_V2_1100.fcs 
Found (relaxed): WSP: 068_10_0058_V6_1500.wsp → FCS: 03142024_068-10-0058 V6.fcs 
Found: WSP: 068_10_0058_V9_2120.wsp → FCS: MGH_Cyto_068_10_0058_V9_2120.fcs 
Found: WSP: 068_10_0060_V1_1000.wsp → FCS: MGH_Cyto_068_10_0060_V1_1000.fcs 
Found: WSP: 068_10_0060_V2_1100.wsp → FCS: MGH_Cyto_068_10_0060_V2_1100.fcs 
Found: WSP: 068_10_0060_V6_1500.wsp → FCS: MGH_Cyto_068_10_0060_V6_1500.fcs 
Found: WSP: 068_10_0060_V9_2120.wsp → FCS: MGH_Cyto_068_10_0060_V9_2120.fcs 
Found: WSP: 068_10_0061_V1_1000.wsp → FCS: MGH_Cyto_068_10_0061_V1_1000.fcs 
Found: WSP: 068_10_0061_V2_1100.wsp → FCS: MGH_Cyto_068_10_0061_V2_1100.fcs 
Found: WSP: 068_10_0061_V6_1500.wsp → FCS: MGH_Cyto_068_10_0061_V6_1500.fcs 
Found: WSP: 068_10_0061_V9_2120.wsp → FCS: MGH_Cyto_068_10_0061_V9_2120.fcs 
Found: WSP: 068_10_0062_V1_1000.wsp → FCS: MGH_Cyto_068_10_0062_V1_1000.fcs 
Found: WSP: 068_10_0062_V2_1100.wsp → FCS: MGH_Cyto_068_10_0062_V2_1100.fcs 
Found: WSP: 068_10_0062_V6_1500.wsp → FCS: MGH_Cyto_068_10_0062_V6_1500.fcs 
Found: WSP: 068_10_0062_V9_2120.wsp → FCS: MGH_Cyto_068_10_0062_V9_2120.fcs 
Found: WSP: 068_10_0063_V1_1000.wsp → FCS: MGH_Cyto_068_10_0063_V1_1000.fcs 
Found: WSP: 068_10_0063_V2_1100.wsp → FCS: MGH_Cyto_068_10_0063_V2_1100.fcs 
Found (relaxed): WSP: 068_10_0063_V6_1500.wsp → FCS: 03142024_068-10-00063_V6_1500.fcs 
Found: WSP: 068_10_0063_V9_2120.wsp → FCS: MGH_Cyto_068_10_0063_V9_2120.fcs 
Code
# Create inventory data frame
inventory <- data.frame(
  wsp_file = valid_wsp_files,
  fcs_file = valid_fcs_files,
  folder = dirname(valid_wsp_files),
  stringsAsFactors = FALSE
)

# Print summary
cat("\n\nFound", nrow(inventory), "valid samples with both WSP and FCS files\n")


Found 84 valid samples with both WSP and FCS files
Code
# Display first few entries if any found
if (nrow(inventory) > 0) {
  print(head(inventory))
}
                                                                                                                                                             wsp_file
1 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V1_1000/068_10_0004_V1_1000.wsp
2    /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V2_1100/068_10_0004_1100.wsp
3    /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V6_1500/068_10_0004_1500.wsp
4    /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V9_2120/068_10_0004_2120.wsp
5    /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0006_V1_1000/068_10_0006_1000.wsp
6    /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0006_V2_1100/068_10_0006_1100.wsp
                                                                                                                                                                      fcs_file
1 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V1_1000/MGH_Cyto_068_10_0004_V1_1000.fcs
2 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V2_1100/MGH_Cyto_068_10_0004_V2_1100.fcs
3 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V6_1500/MGH_Cyto_068_10_0004_V6_1500.fcs
4 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V9_2120/MGH_Cyto_068_10_0004_V9_2120.fcs
5 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0006_V1_1000/MGH_Cyto_068_10_0006_V1_1000.fcs
6 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0006_V2_1100/MGH_Cyto_068_10_0006_V2_1100.fcs
                                                                                                                                       folder
1 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V1_1000
2 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V2_1100
3 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V6_1500
4 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0004_V9_2120
5 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0006_V1_1000
6 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/MGH//MGH_068_10_0006_V2_1100
Code
write.csv(inventory, file.path(out_dir, "mgh_sample_inventory.csv"), row.names = FALSE)
saveRDS(valid_wsp_files, file.path(out_dir, "valid_wsp_files.rds"))
saveRDS(valid_fcs_files, file.path(out_dir, "valid_fcs_files.rds"))

cat("\nInventory saved to:", file.path(out_dir, "mgh_sample_inventory.csv"), "\n")

Inventory saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/MGH/step 01/mgh_sample_inventory.csv 
Code
cat("WSP file vector saved to:", file.path(out_dir, "valid_wsp_files.rds"), "\n")
WSP file vector saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/MGH/step 01/valid_wsp_files.rds 
Code
cat("FCS file vector saved to:", file.path(out_dir, "valid_fcs_files.rds"), "\n")
FCS file vector saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/MGH/step 01/valid_fcs_files.rds 
Code
# cat(" There are in fact 86 sample folders in the MGH directory \n")

rm(inventory, valid_fcs_files, valid_wsp_files, all_wsp_files, relevant_wsp_files, wsp_file, wsp_dir, matching_fcs, relaxed_fcs, mgh_matches)

Step 2: Verify file data

Code
# Set base directory and output directory
out_dir <- str_c(output_dir, "MGH/step 02")

# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir)
}

For MGH samples, we need to load the list of samples along with the machine that was used for each sample. This is provided in the MGH_sample_list_with_machine_no_dates.csv file, which contains the expected visit codes and sample IDs.

Code
mgh_samples <- read_csv(file = str_c(get_data_dir(), "06 Flow cytometry/MGH_sample_list_with_machine_no_dates.csv"))
mgh_samples |> head()
# A tibble: 6 × 3
  Subject     Visit Machine
  <chr>       <chr> <chr>  
1 068-10-0004 V1    5R     
2 068-10-0004 V2    5R     
3 068-10-0006 V1    SORP   
4 068-10-0006 V2    SORP   
5 068-10-0004 V6    SORP   
6 068-10-0006 V6    SORP   
Code
inventory <- read.csv(str_c(output_dir,"MGH/step 01/mgh_sample_inventory.csv"), stringsAsFactors = FALSE, strip.white = TRUE)

# First, let's examine the Visit column to see what we're working with
cat("Unique Visit values in the data:\n")
Unique Visit values in the data:
Code
print(unique(mgh_samples$Visit))
 [1] "V1 " "V2 " "V6 " "V3"  "V9 " "V9"  "V1"  "V2"  "V6"  "V7" 
Code
# Process MGH samples to create expected sample patterns
mgh_expected <- 
  mgh_samples |> 
  mutate(
    # Remove all whitespace and standardize sample ID
    Subject = gsub("\\s+", "", trimws(Subject)),
    # Standardize the Visit field
    Visit_orig = Visit,
    Visit = sapply(Visit, process_visit),
    sample_id = gsub("-", "_", Subject),
    # Create the expected filename pattern with the correct visit code
    visit_code = sapply(Visit, get_visit_code),
    expected_pattern = paste0("MGH_", sample_id, "_", Visit, "_", visit_code)
  )

# Display the Visit standardization results
cat("\nSample of Visit standardization:\n")

Sample of Visit standardization:
Code
print(head(mgh_expected |> select(Subject, Visit_orig, Visit, visit_code)))
# A tibble: 6 × 4
  Subject     Visit_orig Visit visit_code
  <chr>       <chr>      <chr> <chr>     
1 068-10-0004 V1         V1    1000      
2 068-10-0004 V2         V2    1100      
3 068-10-0006 V1         V1    1000      
4 068-10-0006 V2         V2    1100      
5 068-10-0004 V6         V6    1500      
6 068-10-0006 V6         V6    1500      
Code
# Process the inventory to extract all possible identifiers using vectorized operations
inventory_processed <- 
  inventory |> 
  mutate(
    # Extract identifiers from different fields
    folder_id = extract_sample_info(folder),
    wsp_id = extract_sample_info(wsp_file),
    fcs_id = extract_sample_info(fcs_file)
  )

# Add combined identifiers field
inventory_processed <- 
  inventory_processed |> 
  mutate(
    # Combine all identifiers for maximum matching potential
    all_ids = paste(folder_id, wsp_id, fcs_id, sep = "|")
  )

# Show the extracted IDs for verification
cat("Sample of extracted identifiers from inventory:\n")
Sample of extracted identifiers from inventory:
Code
print(head(inventory_processed |> select(folder_id, wsp_id, fcs_id)))
                folder_id                  wsp_id                  fcs_id
1 MGH_068_10_0004_V1_1000 MGH_068_10_0004_V1_1000 MGH_068_10_0004_V1_1000
2 MGH_068_10_0004_V2_1100 MGH_068_10_0004_V2_1100 MGH_068_10_0004_V2_1100
3 MGH_068_10_0004_V6_1500 MGH_068_10_0004_V6_1500 MGH_068_10_0004_V6_1500
4 MGH_068_10_0004_V9_2120 MGH_068_10_0004_V9_2120 MGH_068_10_0004_V9_2120
5 MGH_068_10_0006_V1_1000 MGH_068_10_0006_V1_1000 MGH_068_10_0006_V1_1000
6 MGH_068_10_0006_V2_1100 MGH_068_10_0006_V2_1100 MGH_068_10_0006_V2_1100
Code
# Find which expected samples are missing from the inventory
missing_samples <- 
  mgh_expected |> 
  rowwise() |> 
  mutate(
    found = check_sample_exists(
      sample_id, 
      Visit, 
      visit_code, 
      inventory_processed
    )
  ) |> 
  dplyr::filter(!found) |> 
  ungroup()

# Print summary of results
cat("\nInventory sample count:", nrow(inventory), "\n")

Inventory sample count: 84 
Code
cat("Expected MGH sample count:", nrow(mgh_expected), "\n")
Expected MGH sample count: 84 
Code
cat("Missing samples count:", nrow(missing_samples), "\n")
Missing samples count: 0 
Code
# Print the missing samples
cat("\nMissing samples:\n")

Missing samples:
Code
print(missing_samples %>% select(Subject, Visit_orig, Visit, visit_code, expected_pattern))
# A tibble: 0 × 5
# ℹ 5 variables: Subject <chr>, Visit_orig <chr>, Visit <chr>,
#   visit_code <chr>, expected_pattern <chr>
Code
# Optional: Save the missing samples to a CSV
write.csv(missing_samples, str_c(out_dir, "/missing_mgh_samples.csv"), row.names = FALSE)

# For verification, let's print samples that were found
found_samples <- mgh_expected %>%
  anti_join(missing_samples, by = c("Subject", "Visit"))

cat("\nSample of found samples (first 10):\n")

Sample of found samples (first 10):
Code
print(head(found_samples %>% select(Subject, Visit_orig, Visit, visit_code, expected_pattern), 10))
# A tibble: 10 × 5
   Subject     Visit_orig Visit visit_code expected_pattern       
   <chr>       <chr>      <chr> <chr>      <chr>                  
 1 068-10-0004 V1         V1    1000       MGH_068_10_0004_V1_1000
 2 068-10-0004 V2         V2    1100       MGH_068_10_0004_V2_1100
 3 068-10-0006 V1         V1    1000       MGH_068_10_0006_V1_1000
 4 068-10-0006 V2         V2    1100       MGH_068_10_0006_V2_1100
 5 068-10-0004 V6         V6    1500       MGH_068_10_0004_V6_1500
 6 068-10-0006 V6         V6    1500       MGH_068_10_0006_V6_1500
 7 068-10-0010 V1         V1    1000       MGH_068_10_0010_V1_1000
 8 068-10-0011 V1         V1    1000       MGH_068_10_0011_V1_1000
 9 068-10-0010 V3         V3    1200       MGH_068_10_0010_V3_1200
10 068-10-0011 V2         V2    1100       MGH_068_10_0011_V2_1100
Code
# Additional analysis: Group by Subject and Visit to identify patterns
if (nrow(missing_samples) > 0){
  missing_by_subject <- 
    missing_samples |> 
    group_by(Subject) |> 
    summarize(
      visits_missing = paste(Visit, collapse = ", "),
      count = n()
    ) |> 
    arrange(desc(count))
  
  cat("\nMissing samples by subject:\n")
  print(missing_by_subject)
  rm(missing_by_subject)
}
Code
# Extract standardized information from inventory folders
inventory_standardized <- 
  inventory |> 
  mutate(
    folder_name = basename(folder),
    # Extract subject ID, visit, and visit code from folder name
    subject_id = str_extract(folder_name, "[0-9]{3}_[0-9]{2}_[0-9]{4}"),
    visit = str_extract(folder_name, "V[0-9]"),
    visit_code = str_extract(folder_name, "[0-9]{4}$"),
    # Create a standardized pattern for comparison
    std_pattern = paste0(subject_id, "_", visit, "_", visit_code)
  )

# Print inventory count
cat("Inventory folder count:", nrow(inventory_standardized), "\n")
Inventory folder count: 84 
Code
# Process the matched samples from our expected list
matched_samples <- 
  mgh_expected |> 
  rowwise() |> 
  mutate(
    # Create a function to check each sample individually
    matched_to_inventory = any(str_detect(
      inventory_standardized$folder_name, 
      fixed(sample_id, ignore_case = TRUE)
    ) & str_detect(
      inventory_standardized$folder_name, 
      fixed(Visit, ignore_case = TRUE)
    ))
  ) |> 
  ungroup()

# Count the matched samples
matched_count <- sum(matched_samples$matched_to_inventory)
cat("Matched samples count:", matched_count, "\n")
Matched samples count: 84 
Code
# Let's directly compare the folder names in inventory with our expected patterns
# First, create a list of standardized folder names from inventory
inventory_folders <- 
  inventory_standardized |> 
  mutate(
    # Create several variations of the folder name for flexible matching
    pattern1 = paste0("MGH_", subject_id, "_", visit, "_", visit_code),
    pattern2 = paste0(subject_id, "_", visit, "_", visit_code),
    pattern3 = paste0(subject_id, "_", visit)
  ) |> 
  select(folder, folder_name, pattern1, pattern2, pattern3)

# Let's directly list all the folder names
cat("\n=== All Inventory Folder Names ===\n")

=== All Inventory Folder Names ===
Code
for (i in 1:nrow(inventory_folders)) {
  cat(i, ": ", inventory_folders$folder_name[i], "\n", sep="")
}
1: MGH_068_10_0004_V1_1000
2: MGH_068_10_0004_V2_1100
3: MGH_068_10_0004_V6_1500
4: MGH_068_10_0004_V9_2120
5: MGH_068_10_0006_V1_1000
6: MGH_068_10_0006_V2_1100
7: MGH_068_10_0006_V6_1500
8: MGH_068_10_0006_V9_2120
9: MGH_068_10_0010_V1_1000
10: MGH_068_10_0010_V3_1200
11: MGH_068_10_0010_V6_1500
12: MGH_068_10_0010_V9_2120
13: MGH_068_10_0011_V1_1000
14: MGH_068_10_0011_V2_1100
15: MGH_068_10_0011_V6_1500
16: MGH_068_10_0011_V9_2120
17: MGH_068_10_0013_V1_1000
18: MGH_068_10_0013_V2_1100
19: MGH_068_10_0016_V1_1000
20: MGH_068_10_0026_V1_1000
21: MGH_068_10_0026_V2_1100
22: MGH_068_10_0026_V6_1500
23: MGH_068_10_0026_V9_2120
24: MGH_068_10_0029_V1_1000
25: MGH_068_10_0029_V2_1100
26: MGH_068_10_0029_V6_1500
27: MGH_068_10_0029_V9_2120
28: MGH_068_10_0036_V1_1000
29: MGH_068_10_0036_V2_1100
30: MGH_068_10_0036_V6_1500
31: MGH_068_10_0036_V9_2120
32: MGH_068_10_0040_V1_1000
33: MGH_068_10_0040_V2_1100
34: MGH_068_10_0040_V6_1500
35: MGH_068_10_0040_V9_2120
36: MGH_068_10_0044_V1_1000
37: MGH_068_10_0044_V2_1100
38: MGH_068_10_0044_V6_1500
39: MGH_068_10_0044_V9_2120
40: MGH_068_10_0045_V1_1000
41: MGH_068_10_0045_V2_1100
42: MGH_068_10_0046_V1_1000
43: MGH_068_10_0046_V2_1100
44: MGH_068_10_0046_V6_1500
45: MGH_068_10_0046_V9_2120
46: MGH_068_10_0049_V1_1000
47: MGH_068_10_0049_V2_1100
48: MGH_068_10_0049_V6_1500
49: MGH_068_10_0049_V9_2120
50: MGH_068_10_0050_V1_1000
51: MGH_068_10_0050_V2_1100
52: MGH_068_10_0050_V7_1700
53: MGH_068_10_0052_V1_1000
54: MGH_068_10_0052_V2_1100
55: MGH_068_10_0052_V6_1500
56: MGH_068_10_0052_V9_2120
57: MGH_068_10_0053_V1_1000
58: MGH_068_10_0053_V6_1500
59: MGH_068_10_0053_V9_2120
60: MGH_068_10_0054_V1_1000
61: MGH_068_10_0054_V2_1100
62: MGH_068_10_0054_V6_1500
63: MGH_068_10_0054_V9_2120
64: MGH_068_10_0056_V1_1000
65: MGH_068_10_0058_V1_1000
66: MGH_068_10_0058_V2_1100
67: MGH_068_10_0058_V6_1500
68: MGH_068_10_0058_V9_2120
69: MGH_068_10_0060_V1_1000
70: MGH_068_10_0060_V2_1100
71: MGH_068_10_0060_V6_1500
72: MGH_068_10_0060_V9_2120
73: MGH_068_10_0061_V1_1000
74: MGH_068_10_0061_V2_1100
75: MGH_068_10_0061_V6_1500
76: MGH_068_10_0061_V9_2120
77: MGH_068_10_0062_V1_1000
78: MGH_068_10_0062_V2_1100
79: MGH_068_10_0062_V6_1500
80: MGH_068_10_0062_V9_2120
81: MGH_068_10_0063_V1_1000
82: MGH_068_10_0063_V2_1100
83: MGH_068_10_0063_V6_1500
84: MGH_068_10_0063_V9_2120
Code
# Let's also list all the expected patterns
cat("\n=== All Expected Sample Patterns ===\n")

=== All Expected Sample Patterns ===
Code
for (i in 1:nrow(mgh_expected)) {
  cat(i, ": ", mgh_expected$expected_pattern[i], " (", mgh_expected$Subject[i], ", ", mgh_expected$Visit[i], ")\n", sep="")
}
1: MGH_068_10_0004_V1_1000 (068-10-0004, V1)
2: MGH_068_10_0004_V2_1100 (068-10-0004, V2)
3: MGH_068_10_0006_V1_1000 (068-10-0006, V1)
4: MGH_068_10_0006_V2_1100 (068-10-0006, V2)
5: MGH_068_10_0004_V6_1500 (068-10-0004, V6)
6: MGH_068_10_0006_V6_1500 (068-10-0006, V6)
7: MGH_068_10_0010_V1_1000 (068-10-0010, V1)
8: MGH_068_10_0011_V1_1000 (068-10-0011, V1)
9: MGH_068_10_0010_V3_1200 (068-10-0010, V3)
10: MGH_068_10_0011_V2_1100 (068-10-0011, V2)
11: MGH_068_10_0004_V9_2120 (068-10-0004, V9)
12: MGH_068_10_0010_V6_1500 (068-10-0010, V6)
13: MGH_068_10_0011_V6_1500 (068-10-0011, V6)
14: MGH_068_10_0013_V1_1000 (068-10-0013, V1)
15: MGH_068_10_0006_V9_2120 (068-10-0006, V9)
16: MGH_068_10_0016_V1_1000 (068-10-0016, V1)
17: MGH_068_10_0013_V2_1100 (068-10-0013, V2)
18: MGH_068_10_0010_V9_2120 (068-10-0010, V9)
19: MGH_068_10_0011_V9_2120 (068-10-0011, V9)
20: MGH_068_10_0026_V1_1000 (068-10-0026, V1)
21: MGH_068_10_0026_V2_1100 (068-10-0026, V2)
22: MGH_068_10_0029_V1_1000 (068-10-0029, V1)
23: MGH_068_10_0029_V2_1100 (068-10-0029, V2)
24: MGH_068_10_0026_V6_1500 (068-10-0026, V6)
25: MGH_068_10_0029_V6_1500 (068-10-0029, V6)
26: MGH_068_10_0036_V1_1000 (068-10-0036, V1)
27: MGH_068_10_0036_V2_1100 (068-10-0036, V2)
28: MGH_068_10_0040_V1_1000 (068-10-0040, V1)
29: MGH_068_10_0040_V2_1100 (068-10-0040, V2)
30: MGH_068_10_0026_V9_2120 (068-10-0026, V9)
31: MGH_068_10_0045_V1_1000 (068-10-0045, V1)
32: MGH_068_10_0046_V1_1000 (068-10-0046, V1)
33: MGH_068_10_0044_V1_1000 (068-10-0044, V1)
34: MGH_068_10_0045_V2_1100 (068-10-0045, V2)
35: MGH_068_10_0050_V1_1000 (068-10-0050, V1)
36: MGH_068_10_0044_V2_1100 (068-10-0044, V2)
37: MGH_068_10_0046_V2_1100 (068-10-0046, V2)
38: MGH_068_10_0050_V2_1100 (068-10-0050, V2)
39: MGH_068_10_0029_V9_2120 (068-10-0029, V9)
40: MGH_068_10_0049_V1_1000 (068-10-0049, V1)
41: MGH_068_10_0036_V6_1500 (068-10-0036, V6)
42: MGH_068_10_0056_V1_1000 (068-10-0056, V1)
43: MGH_068_10_0052_V1_1000 (068-10-0052, V1)
44: MGH_068_10_0054_V1_1000 (068-10-0054, V1)
45: MGH_068_10_0049_V2_1100 (068-10-0049, V2)
46: MGH_068_10_0053_V1_1000 (068-10-0053, V1)
47: MGH_068_10_0058_V1_1000 (068-10-0058, V1)
48: MGH_068_10_0054_V2_1100 (068-10-0054, V2)
49: MGH_068_10_0060_V1_1000 (068-10-0060, V1)
50: MGH_068_10_0040_V6_1500 (068-10-0040, V6)
51: MGH_068_10_0052_V2_1100 (068-10-0052, V2)
52: MGH_068_10_0061_V1_1000 (068-10-0061, V1)
53: MGH_068_10_0062_V1_1000 (068-10-0062, V1)
54: MGH_068_10_0058_V2_1100 (068-10-0058, V2)
55: MGH_068_10_0060_V2_1100 (068-10-0060, V2)
56: MGH_068_10_0062_V2_1100 (068-10-0062, V2)
57: MGH_068_10_0046_V6_1500 (068-10-0046, V6)
58: MGH_068_10_0063_V1_1000 (068-10-0063, V1)
59: MGH_068_10_0061_V2_1100 (068-10-0061, V2)
60: MGH_068_10_0044_V6_1500 (068-10-0044, V6)
61: MGH_068_10_0063_V2_1100 (068-10-0063, V2)
62: MGH_068_10_0049_V6_1500 (068-10-0049, V6)
63: MGH_068_10_0052_V6_1500 (068-10-0052, V6)
64: MGH_068_10_0050_V7_1700 (068-10-0050, V7)
65: MGH_068_10_0053_V6_1500 (068-10-0053, V6)
66: MGH_068_10_0054_V6_1500 (068-10-0054, V6)
67: MGH_068_10_0036_V9_2120 (068-10-0036, V9)
68: MGH_068_10_0060_V6_1500 (068-10-0060, V6)
69: MGH_068_10_0061_V6_1500 (068-10-0061, V6)
70: MGH_068_10_0062_V6_1500 (068-10-0062, V6)
71: MGH_068_10_0058_V6_1500 (068-10-0058, V6)
72: MGH_068_10_0063_V6_1500 (068-10-0063, V6)
73: MGH_068_10_0040_V9_2120 (068-10-0040, V9)
74: MGH_068_10_0044_V9_2120 (068-10-0044, V9)
75: MGH_068_10_0046_V9_2120 (068-10-0046, V9)
76: MGH_068_10_0054_V9_2120 (068-10-0054, V9)
77: MGH_068_10_0049_V9_2120 (068-10-0049, V9)
78: MGH_068_10_0058_V9_2120 (068-10-0058, V9)
79: MGH_068_10_0053_V9_2120 (068-10-0053, V9)
80: MGH_068_10_0060_V9_2120 (068-10-0060, V9)
81: MGH_068_10_0061_V9_2120 (068-10-0061, V9)
82: MGH_068_10_0062_V9_2120 (068-10-0062, V9)
83: MGH_068_10_0052_V9_2120 (068-10-0052, V9)
84: MGH_068_10_0063_V9_2120 (068-10-0063, V9)
Code
# Now let's try a different approach - let's extract the Subject and Visit from the folder names
# and directly match them to our expected Subject and Visit combinations
inventory_parsed <- 
  inventory_standardized |> 
  mutate(
    # Extract just the numeric part of the subject ID
    subject_base = str_extract(subject_id, "[0-9]{3}_[0-9]{2}_[0-9]{4}"),
    # Convert to the format with hyphens for direct comparison
    subject_with_hyphens = gsub("_", "-", subject_base)
  )

# Create a direct comparison table
comparison <- 
  expand.grid(
    inventory_idx = 1:nrow(inventory_parsed),
    expected_idx = 1:nrow(mgh_expected),
    stringsAsFactors = FALSE
  ) |> 
  mutate(
    inventory_subject = inventory_parsed$subject_with_hyphens[inventory_idx],
    inventory_visit = inventory_parsed$visit[inventory_idx],
    expected_subject = mgh_expected$Subject[expected_idx],
    expected_visit = mgh_expected$Visit[expected_idx],
    # Check if this is a match
    is_match = (inventory_subject == expected_subject) & (inventory_visit == expected_visit)
  ) |> 
  dplyr::filter(is_match)

# Count the number of unique inventory folders and expected samples that have matches
unique_inventory_matched <- length(unique(comparison$inventory_idx))
unique_expected_matched <- length(unique(comparison$expected_idx))

cat("\n=== Direct Matching Results ===\n")

=== Direct Matching Results ===
Code
cat("Unique inventory folders matched:", unique_inventory_matched, "out of", nrow(inventory), "\n")
Unique inventory folders matched: 84 out of 84 
Code
cat("Unique expected samples matched:", unique_expected_matched, "out of", nrow(mgh_expected), "\n")
Unique expected samples matched: 84 out of 84 
Code
# List inventory folders that don't match any expected sample
unmatched_inventory <- setdiff(1:nrow(inventory_parsed), unique(comparison$inventory_idx))
cat("\nInventory folders with no match in expected samples:", length(unmatched_inventory), "\n")

Inventory folders with no match in expected samples: 0 
Code
if (length(unmatched_inventory) > 0) {
  cat("Unmatched inventory folders:\n")
  for (idx in unmatched_inventory) {
    cat("  ", inventory_parsed$folder_name[idx], " (Subject: ", 
        inventory_parsed$subject_with_hyphens[idx], ", Visit: ", 
        inventory_parsed$visit[idx], ")\n", sep="")
  }
}

# List expected samples that don't match any inventory folder
unmatched_expected <- setdiff(1:nrow(mgh_expected), unique(comparison$expected_idx))
cat("\nExpected samples with no match in inventory:", length(unmatched_expected), "\n")

Expected samples with no match in inventory: 0 
Code
if (length(unmatched_expected) > 0) {
  cat("Unmatched expected samples:\n")
  for (idx in unmatched_expected) {
    cat("  ", mgh_expected$expected_pattern[idx], " (Subject: ", 
        mgh_expected$Subject[idx], ", Visit: ", 
        mgh_expected$Visit[idx], ")\n", sep="")
  }
}

We create a “cleaned” inventory with harmonized pid and visit_code.

Code
inventory_cleaned <- 
  inventory_standardized |> 
  mutate(
    pid = subject_id |> str_remove_all("_")
  ) |> 
  select(folder_name, pid, subject_id, visit, visit_code, folder, wsp_file, fcs_file)

In addition, for three V9 samples (from participants 068100061, 068100062, and 068100063) the flow cytometry protocol has been ran with missing channels (“BV650-CD19 unavailable this day so it was not used”). Since this will cause problem when extracting the gating structure, we add these missing channels to the inventory so that they can be processed differently at step 3.

Code
inventory_cleaned <- 
  inventory_cleaned |> 
  mutate(
    missing_channels = 
      ifelse(
        (pid %in% c("068100061", "068100062", "068100063")) & (visit == "V9"), 
         "BV650-A | Comp-BV650-A", 
        NA_character_
      )
  )
Code
write.csv(inventory_cleaned, str_c(out_dir, "/mgh_sample_inventory_cleaned.csv"), row.names = FALSE)
Code
rm(
  inventory, inventory_standardized, inventory_processed, inventory_parsed, 
  inventory_folders, inventory_cleaned,
  mgh_expected, # mgh_samples, 
  missing_samples, found_samples, comparison, matched_samples
   )

rm(i, matched_count, unique_expected_matched, unique_inventory_matched, unmatched_expected, unmatched_inventory)

Step 3: Extracting gating structure

Code
# Set base directory and output directory
out_dir <- str_c(output_dir, "MGH/step 03/")

# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir)
}
Code
# Run the processing
# Start with test mode = TRUE to check a few files first
# If the test looks good, run the full processing with test_mode = FALSE
results <- 
  process_inventory(
    inventory_file = str_c(output_dir, "MGH/step 02/mgh_sample_inventory_cleaned.csv"), 
    batch_size = 10, 
    test_mode = FALSE,
    out_dir = out_dir
    )

==== Processing Batch 1 of 9 ====
Files 1 to 10 of 84 
Processing file 1 of 10: 068_10_0004_V1_1000.wsp (Participant: 68100004, Visit: 1000)
Successfully processed
Processing file 2 of 10: 068_10_0004_1100.wsp (Participant: 68100004, Visit: 1100)
Successfully processed
Processing file 3 of 10: 068_10_0004_1500.wsp (Participant: 68100004, Visit: 1500)
Successfully processed
Processing file 4 of 10: 068_10_0004_2120.wsp (Participant: 68100004, Visit: 2120)
Successfully processed
Processing file 5 of 10: 068_10_0006_1000.wsp (Participant: 68100006, Visit: 1000)
Successfully processed
Processing file 6 of 10: 068_10_0006_1100.wsp (Participant: 68100006, Visit: 1100)
Successfully processed
Processing file 7 of 10: 068_10_0006_1500.wsp (Participant: 68100006, Visit: 1500)
Successfully processed
Processing file 8 of 10: 068_10_0006_2120.wsp (Participant: 68100006, Visit: 2120)
Successfully processed
Processing file 9 of 10: 068_10_0010_1000.wsp (Participant: 68100010, Visit: 1000)
Successfully processed
Processing file 10 of 10: 068_10_0010_1200.wsp (Participant: 68100010, Visit: 1200)
Successfully processed
Saved batch results to gating_results_batch_1_to_10.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 2 of 9 ====
Files 11 to 20 of 84 
Processing file 11 of 20: 068_10_0010_1500.wsp (Participant: 68100010, Visit: 1500)
Successfully processed
Processing file 12 of 20: 068_10_0010_2120.wsp (Participant: 68100010, Visit: 2120)
Successfully processed
Processing file 13 of 20: 068_10_0011_1000.wsp (Participant: 68100011, Visit: 1000)
Successfully processed
Processing file 14 of 20: 068_10_0011_1100.wsp (Participant: 68100011, Visit: 1100)
Successfully processed
Processing file 15 of 20: 068_10_0011_1500.wsp (Participant: 68100011, Visit: 1500)
Successfully processed
Processing file 16 of 20: 068_10_0011_2120.wsp (Participant: 68100011, Visit: 2120)
Successfully processed
Processing file 17 of 20: 068_10_0013_1000.wsp (Participant: 68100013, Visit: 1000)
Successfully processed
Processing file 18 of 20: 068_10_0013_1100.wsp (Participant: 68100013, Visit: 1100)
Successfully processed
Processing file 19 of 20: 068_10_0016_1000.wsp (Participant: 68100016, Visit: 1000)
Successfully processed
Processing file 20 of 20: 068_10_0026_1000.wsp (Participant: 68100026, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_11_to_20.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 3 of 9 ====
Files 21 to 30 of 84 
Processing file 21 of 30: 068_10_0026_1100.wsp (Participant: 68100026, Visit: 1100)
Successfully processed
Processing file 22 of 30: 068_10_0026_V6_1500.wsp (Participant: 68100026, Visit: 1500)
Successfully processed
Processing file 23 of 30: 068_10_0026_V9_2120.wsp (Participant: 68100026, Visit: 2120)
Successfully processed
Processing file 24 of 30: 068_10_0029_1000.wsp (Participant: 68100029, Visit: 1000)
Successfully processed
Processing file 25 of 30: 068_10_0029_1100.wsp (Participant: 68100029, Visit: 1100)
Successfully processed
Processing file 26 of 30: 068_10_0029-1500.wsp (Participant: 68100029, Visit: 1500)
Successfully processed
Processing file 27 of 30: 068_10_0029_V9_2120.wsp (Participant: 68100029, Visit: 2120)
Successfully processed
Processing file 28 of 30: 068_10_0036_1000.wsp (Participant: 68100036, Visit: 1000)
Successfully processed
Processing file 29 of 30: 068_10_0036_1100.wsp (Participant: 68100036, Visit: 1100)
Successfully processed
Processing file 30 of 30: 068_10_0036_V6_1500.wsp (Participant: 68100036, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_21_to_30.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 4 of 9 ====
Files 31 to 40 of 84 
Processing file 31 of 40: 068_10_0036_V9_2120.wsp (Participant: 68100036, Visit: 2120)
Successfully processed
Processing file 32 of 40: 068_10_0040_1000.wsp (Participant: 68100040, Visit: 1000)
Successfully processed
Processing file 33 of 40: 068_10_0040_V2_1100.wsp (Participant: 68100040, Visit: 1100)
Successfully processed
Processing file 34 of 40: 068_10_0040_V6_1500.wsp (Participant: 68100040, Visit: 1500)
Successfully processed
Processing file 35 of 40: 068_10_0040_V9_2120.wsp (Participant: 68100040, Visit: 2120)
Successfully processed
Processing file 36 of 40: 068_10_0044_V1_1000.wsp (Participant: 68100044, Visit: 1000)
Successfully processed
Processing file 37 of 40: 068_10_0044_1100.wsp (Participant: 68100044, Visit: 1100)
Successfully processed
Processing file 38 of 40: 068_10_0044_V6_1500.wsp (Participant: 68100044, Visit: 1500)
Successfully processed
Processing file 39 of 40: 068_10_0044_V9_2120.wsp (Participant: 68100044, Visit: 2120)
Successfully processed
Processing file 40 of 40: 068_10_0045_1000.wsp (Participant: 68100045, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_31_to_40.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 5 of 9 ====
Files 41 to 50 of 84 
Processing file 41 of 50: 068_10_0045_V2_1100.wsp (Participant: 68100045, Visit: 1100)
Successfully processed
Processing file 42 of 50: 068_10_0046_V1_1000.wsp (Participant: 68100046, Visit: 1000)
Successfully processed
Processing file 43 of 50: 068_10_0046_1100.wsp (Participant: 68100046, Visit: 1100)
Successfully processed
Processing file 44 of 50: 068_10_0046_V6_1500.wsp (Participant: 68100046, Visit: 1500)
Successfully processed
Processing file 45 of 50: 068_10_0046_V9_2120.wsp (Participant: 68100046, Visit: 2120)
Successfully processed
Processing file 46 of 50: 068_10_0049_V1_1000.wsp (Participant: 68100049, Visit: 1000)
Successfully processed
Processing file 47 of 50: 068_10_0049_V2_1100.wsp (Participant: 68100049, Visit: 1100)
Successfully processed
Processing file 48 of 50: 068_10_0049_V6_1500.wsp (Participant: 68100049, Visit: 1500)
Successfully processed
Processing file 49 of 50: 068_10_0049_V9_2120.wsp (Participant: 68100049, Visit: 2120)
Successfully processed
Processing file 50 of 50: 068_10_0050_V1_1000.wsp (Participant: 68100050, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_41_to_50.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 6 of 9 ====
Files 51 to 60 of 84 
Processing file 51 of 60: 068_10_0050_V2_1100.wsp (Participant: 68100050, Visit: 1100)
Successfully processed
Processing file 52 of 60: 068_10_0050_V7_1700.wsp (Participant: 68100050, Visit: 1700)
Successfully processed
Processing file 53 of 60: 068_10_0052_V1_1000.wsp (Participant: 68100052, Visit: 1000)
Successfully processed
Processing file 54 of 60: 068_10_0052_V2_1100.wsp (Participant: 68100052, Visit: 1100)
Successfully processed
Processing file 55 of 60: 068_10_0052_V6_1500.wsp (Participant: 68100052, Visit: 1500)
Successfully processed
Processing file 56 of 60: 068_10_0052_V9_2120.wsp (Participant: 68100052, Visit: 2120)
Successfully processed
Processing file 57 of 60: 068_10_0053_V1_1000.wsp (Participant: 68100053, Visit: 1000)
Successfully processed
Processing file 58 of 60: 068_10_0053_V6_1500.wsp (Participant: 68100053, Visit: 1500)
Successfully processed
Processing file 59 of 60: 068_10_0053_V9_2120.wsp (Participant: 68100053, Visit: 2120)
Successfully processed
Processing file 60 of 60: 068_10_0054_V1_1000.wsp (Participant: 68100054, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_51_to_60.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 7 of 9 ====
Files 61 to 70 of 84 
Processing file 61 of 70: 068_10_0054_V2_1100.wsp (Participant: 68100054, Visit: 1100)
Successfully processed
Processing file 62 of 70: 068_10_0054_V6_1500.wsp (Participant: 68100054, Visit: 1500)
Successfully processed
Processing file 63 of 70: 068_10_0054_V9_2120.wsp (Participant: 68100054, Visit: 2120)
Successfully processed
Processing file 64 of 70: 068_10_0056_1000.wsp (Participant: 68100056, Visit: 1000)
Successfully processed
Processing file 65 of 70: 068_10_0058_V1_1000.wsp (Participant: 68100058, Visit: 1000)
Successfully processed
Processing file 66 of 70: 068_10_0058_V2_1100.wsp (Participant: 68100058, Visit: 1100)
Sample groups data structure: data.frame 
Sample groups data columns: groupName, groupID, sampleID 
First few rows of sample groups data:
    groupName groupID sampleID
1 All Samples       0        1
2 All Samples       0        2
3 All Samples       0        3
4 All Samples       0        4
5 All Samples       0        5
6 All Samples       0        6
Available sample groups in 068_10_0058_V2_1100.wsp : All Samples, Compensation, Samples 
Approach 1: Using 'Samples' group with default settings
Approach 1 succeeded!
Extraction succeeded! Getting population statistics...
Successfully processed
Processing file 67 of 70: 068_10_0058_V6_1500.wsp (Participant: 68100058, Visit: 1500)
Successfully processed
Processing file 68 of 70: 068_10_0058_V9_2120.wsp (Participant: 68100058, Visit: 2120)
Successfully processed
Processing file 69 of 70: 068_10_0060_V1_1000.wsp (Participant: 68100060, Visit: 1000)
Successfully processed
Processing file 70 of 70: 068_10_0060_V2_1100.wsp (Participant: 68100060, Visit: 1100)
Successfully processed
Saved batch results to gating_results_batch_61_to_70.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 8 of 9 ====
Files 71 to 80 of 84 
Processing file 71 of 80: 068_10_0060_V6_1500.wsp (Participant: 68100060, Visit: 1500)
Successfully processed
Processing file 72 of 80: 068_10_0060_V9_2120.wsp (Participant: 68100060, Visit: 2120)
Successfully processed
Processing file 73 of 80: 068_10_0061_V1_1000.wsp (Participant: 68100061, Visit: 1000)
Successfully processed
Processing file 74 of 80: 068_10_0061_V2_1100.wsp (Participant: 68100061, Visit: 1100)
Successfully processed
Processing file 75 of 80: 068_10_0061_V6_1500.wsp (Participant: 68100061, Visit: 1500)
Successfully processed
Processing file 76 of 80: 068_10_0061_V9_2120.wsp (Participant: 68100061, Visit: 2120)
Sample groups data structure: data.frame 
Sample groups data columns: groupName, groupID, sampleID 
First few rows of sample groups data:
    groupName groupID sampleID
1 All Samples       0        1
Available sample groups in 068_10_0061_V9_2120.wsp : All Samples 
FCS files in workspace (list format): (non-character data), 02_07_2025_MGH_Cyto_068_10_0061_V9_007.fcs, (non-character data), (non-character data) 
Using 'All Samples' group (index 1 )
Original FCS file channels: FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W ...
Total channels in original FCS file: 21 
Approach 1: Basic attempt with original file
Approach 1 error: colname not found: Comp-BV650-A 
Approach 2: Creating a modified FCS file with dummy channels
Adding dummy channel: BV650-A 
Adding dummy channel: Comp-BV650-A 
Modified FCS file created with channels: FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W ...
Total channels in modified FCS file: 23 
Approach 2 succeeded!
Extraction succeeded! Getting population statistics...
Successfully processed
Processing file 77 of 80: 068_10_0062_V1_1000.wsp (Participant: 68100062, Visit: 1000)
Successfully processed
Processing file 78 of 80: 068_10_0062_V2_1100.wsp (Participant: 68100062, Visit: 1100)
Successfully processed
Processing file 79 of 80: 068_10_0062_V6_1500.wsp (Participant: 68100062, Visit: 1500)
Successfully processed
Processing file 80 of 80: 068_10_0062_V9_2120.wsp (Participant: 68100062, Visit: 2120)
Sample groups data structure: data.frame 
Sample groups data columns: groupName, groupID, sampleID 
First few rows of sample groups data:
    groupName groupID sampleID
1 All Samples       0        1
Available sample groups in 068_10_0062_V9_2120.wsp : All Samples 
FCS files in workspace (list format): (non-character data), 02_07_2025_MGH_Cyto_068_10_0062_V9_008.fcs, (non-character data), (non-character data) 
Using 'All Samples' group (index 1 )
Original FCS file channels: FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W ...
Total channels in original FCS file: 21 
Approach 1: Basic attempt with original file
Approach 1 error: colname not found: Comp-BV650-A 
Approach 2: Creating a modified FCS file with dummy channels
Adding dummy channel: BV650-A 
Adding dummy channel: Comp-BV650-A 
Modified FCS file created with channels: FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W ...
Total channels in modified FCS file: 23 
Approach 2 succeeded!
Extraction succeeded! Getting population statistics...
Successfully processed
Saved batch results to gating_results_batch_71_to_80.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 9 of 9 ====
Files 81 to 84 of 84 
Processing file 81 of 84: 068_10_0063_V1_1000.wsp (Participant: 68100063, Visit: 1000)
Successfully processed
Processing file 82 of 84: 068_10_0063_V2_1100.wsp (Participant: 68100063, Visit: 1100)
Successfully processed
Processing file 83 of 84: 068_10_0063_V6_1500.wsp (Participant: 68100063, Visit: 1500)
Successfully processed
Processing file 84 of 84: 068_10_0063_V9_2120.wsp (Participant: 68100063, Visit: 2120)
Sample groups data structure: data.frame 
Sample groups data columns: groupName, groupID, sampleID 
First few rows of sample groups data:
    groupName groupID sampleID
1 All Samples       0        1
Available sample groups in 068_10_0063_V9_2120.wsp : All Samples 
FCS files in workspace (list format): (non-character data), 02_19_2025_MGH_Cyto_068_10_0063_V9.fcs, (non-character data), (non-character data) 
Using 'All Samples' group (index 1 )
Original FCS file channels: FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W ...
Total channels in original FCS file: 22 
Approach 1: Basic attempt with original file
Approach 1 succeeded!
Extraction succeeded! Getting population statistics...
Successfully processed
Saved batch results to gating_results_batch_81_to_84.RData 
Updated overall results file: gating_results_all.RData

CAP Flow data

Step 1: Creating the file inventory

Code
base_dir <- str_c(flow_dir, "reorganized_south_africa_files/")
out_dir <- str_c(output_dir, "CAP/step 01")

# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir, recursive = TRUE)
}

# Find all WSP files in the entire directory structure
all_wsp_files <- list.files(base_dir, pattern = "\\.wsp$", recursive = TRUE, full.names = TRUE)
cat("Total WSP files found:", length(all_wsp_files), "\n")
Total WSP files found: 272 
Code
# Focus on WSP files that contain "068200" 
relevant_wsp_files <- all_wsp_files[grepl("068200", all_wsp_files)]
cat("WSP files related to 068200 samples:", length(relevant_wsp_files), "\n")
WSP files related to 068200 samples: 272 
Code
# Initialize vectors for valid files
valid_wsp_files <- character(0)
valid_fcs_files <- character(0)

# For each WSP file, find corresponding FCS file with 068200 pattern in the same folder
for (wsp_file in relevant_wsp_files) {
  wsp_dir <- dirname(wsp_file)
  
  # Look for FCS files in the same directory with 068200 pattern
  matching_fcs <- list.files(wsp_dir, pattern = "068200.*\\.fcs$", full.names = TRUE)
  
  # If we found matching FCS files
  if (length(matching_fcs) > 0) {
    # Take the first match (should typically be only one per folder)
    valid_wsp_files <- c(valid_wsp_files, wsp_file)
    valid_fcs_files <- c(valid_fcs_files, matching_fcs[1])
    
    cat("Found: WSP:", basename(wsp_file), "→ FCS:", basename(matching_fcs[1]), "\n")
  } else {
    # Try a more relaxed pattern if the strict one doesn't match
    relaxed_fcs <- list.files(wsp_dir, pattern = ".*068200.*\\.fcs$", full.names = TRUE)
    
    if (length(relaxed_fcs) > 0) {
      # Prefer files with 068200 in the name if available
      sa_matches <- relaxed_fcs[grepl("068|Sample", relaxed_fcs)]
      if (length(sa_matches) > 0) {
        valid_wsp_files <- c(valid_wsp_files, wsp_file)
        valid_fcs_files <- c(valid_fcs_files, mgh_matches[1])
        
        cat("Found (relaxed): WSP:", basename(wsp_file), "→ FCS:", basename(sa_matches[1]), "\n")
      } else {
        valid_wsp_files <- c(valid_wsp_files, wsp_file)
        valid_fcs_files <- c(valid_fcs_files, relaxed_fcs[1])
        
        cat("Found (very relaxed): WSP:", basename(wsp_file), "→ FCS:", basename(relaxed_fcs[1]), "\n")
      }
    } else {
      cat("No matching FCS found for:", basename(wsp_file), "in", wsp_dir, "\n")
    }
  }
}
Found: WSP: 068200020-1000.wsp → FCS: Samples_Cytobrush_2137387_068200020_1000_007.fcs 
Found: WSP: 068200008-1000.wsp → FCS: Samples_CYTO_2145791-068200008-1000_007.fcs 
Found: WSP: 068200023-1000.wsp → FCS: Samples_CYTO_2145780-068200023-1000_008.fcs 
Found: WSP: 068200008-1100.wsp → FCS: Samples_CYTO_2145791-068200008-1100_007.fcs 
Found: WSP: 068200023-1100.wsp → FCS: Samples_CYTO_2152241-068200023-1100_008.fcs 
Found: WSP: 068200028-1000.wsp → FCS: Samples_CYTO_2154415-068200028-1000_007.fcs 
Found: WSP: 068200028-1100.wsp → FCS: Samples_CYTO_068200028-1100_007.fcs 
Found: WSP: 068200044-1000.wsp → FCS: Samples_CYTO_2161840-068200044-1000_007.fcs 
Found: WSP: 068200044-1100.wsp → FCS: Samples_CYTO_2165959-068200044-1100_008.fcs 
Found: WSP: 068200062-1000.wsp → FCS: Samples_CYTO_2166082-068200062-1000_007.fcs 
Found: WSP: 068200020-1500.wsp → FCS: Samples_CYTO_068200020-1500_007.fcs 
Found: WSP: 068200023-1500.wsp → FCS: Samples_CYTO_068200023-1500_008.fcs 
Found: WSP: 068200050-1000.wsp → FCS: Samples_CYTO_068200050-1000_009.fcs 
Found: WSP: 068200008-1500.wsp → FCS: Samples_CYTO_068200008-1500_007.fcs 
Found: WSP: 068200058-1000.wsp → FCS: Samples_CYTO_068200058-1000_007.fcs 
Found: WSP: 068200050-1100.wsp → FCS: Samples_CYTO_2169427-068200050-1100_007.fcs 
Found: WSP: 068200062-1100.wsp → FCS: Samples_CYTO_2169441-068200062-1100_008.fcs 
Found: WSP: 068200061-1000.wsp → FCS: Samples_CYTO_2170405-068200061-1000_007.fcs 
Found: WSP: 068200070-1000.wsp → FCS: Samples_CYTO_2170950-068200070-1000_007.fcs 
Found: WSP: 068200058-1100.wsp → FCS: Samples_CYTO_2171465-068200058-1100_007.fcs 
Found: WSP: 068200028-1500.wsp → FCS: Samples_CYTO_2172311-068200028-1500_007.fcs 
Found: WSP: 068200085-1000.wsp → FCS: Samples_CYTO_2175253-068200085-1000_007.fcs 
Found: WSP: 068200088-1000.wsp → FCS: Samples_CYTO_2177201-068200088-1000_007.fcs 
Found: WSP: 068200085-1100.wsp → FCS: Samples_CYTO_068200085-1100_007.fcs 
Found: WSP: 068200087-1000.wsp → FCS: Samples_CYTO_2181731-068200087-1000_007.fcs 
Found: WSP: 068200088-1100.wsp → FCS: Samples_CYTO_2181721-068200088-1100_008.fcs 
Found: WSP: 068200044-1500.wsp → FCS: Samples_CYTO_068200044-1500_007.fcs 
Found: WSP: 068200087-1100.wsp → FCS: Samples_CYTO_068200087-1100_007.fcs 
Found: WSP: 068200050-1500.wsp → FCS: Samples_CYTO_068200050-1500_007.fcs 
Found: WSP: 068200118-1000.wsp → FCS: Samples_CYTO_068200118-1000_008.fcs 
Found: WSP: 068200058-1500.wsp → FCS: Samples_CYTO_068200058-1500_007.fcs 
Found: WSP: 068200118-1100.wsp → FCS: Samples_CYTO_068200118-1100_008.fcs 
Found: WSP: 068200061-1500.wsp → FCS: Samples_CYTO_068200061-1500_007.fcs 
Found: WSP: 068200062-1511.wsp → FCS: Samples_CYTO_068200062-1511_008.fcs 
Found: WSP: 068200020-2120.wsp → FCS: Samples_CYTO_068200020-2120_007.fcs 
Found: WSP: 068200127-1000.wsp → FCS: Samples_CYTO_068200127-1000_008.fcs 
Found: WSP: 068200008-2120.wsp → FCS: Samples_CYTO_068200008-2120_007.fcs 
Found: WSP: 068200150-1000.wsp → FCS: Samples_CYTO_068200150-1000_008.fcs 
Found: WSP: 068200023-2120.wsp → FCS: Samples_CYTO_068200023-2120_007.fcs 
Found: WSP: 068200127-1100.wsp → FCS: Samples_CYTO_068200127-1100_007.fcs 
Found: WSP: 068200085-1500.wsp → FCS: Samples_CYTO_068200085-1500_007.fcs 
Found: WSP: 068200088-1500.wsp → FCS: Samples_CYTO_068200088-1500_008.fcs 
Found: WSP: 068200131-1000.wsp → FCS: Samples_CYTO_068200131-1000_009.fcs 
Found: WSP: 068200153-1000.wsp → FCS: Samples_CYTO_068200153-1000_007.fcs 
Found: WSP: 068200028-2120.wsp → FCS: Samples_CYTO_068200028-2120_007.fcs 
Found: WSP: 068200131-1100.wsp → FCS: Samples_CYTO_068200131-1100_008.fcs 
Found: WSP: 068200153-1100.wsp → FCS: Samples_CYTO_068200153-1100_007.fcs 
Found: WSP: 068200118-1500.wsp → FCS: Samples_SA-CYTO_068200118-1500_007.fcs 
Found: WSP: 068200087-1511.wsp → FCS: Samples_CYTO_068200087-1511_007.fcs 
Found: WSP: 068200044-2120.wsp → FCS: Samples_CYTO_068200044-2120_007.fcs 
Found: WSP: 068200191-1000.wsp → FCS: Samples_CYTO_068200191-1000_008.fcs 
Found: WSP: 068200204-1000.wsp → FCS: Samples_CYTO_068200204-1000_007.fcs 
Found: WSP: 068200127-1500.wsp → FCS: Samples_SA-CYTO_068200127-1500_007.fcs 
Found: WSP: 068200050-2120.wsp → FCS: Samples_CYTO_068200050-2120_007.fcs 
Found: WSP: 068200214-1000.wsp → FCS: Samples_CYTO_068200214-1000_007.fcs 
Found: WSP: 068200058-2120.wsp → FCS: Samples_CYTO_068200058-2120_009.fcs 
Found: WSP: 068200150-1500.wsp → FCS: Samples_CYTO_068200150-1500_008.fcs 
Found: WSP: 068200204-1100.wsp → FCS: Samples_CYTO_068200204-1100_007.fcs 
Found: WSP: 068200214-1100.wsp → FCS: Samples_CYTO_068200214-1100_007.fcs 
Found: WSP: 068200131-1500.wsp → FCS: Samples_CYTO_068200131-1500_007.fcs 
Found: WSP: 068200179-1000.wsp → FCS: Samples_CYTO_068200179-1000_008.fcs 
Found: WSP: 068200061-2120.wsp → FCS: Samples_CYTO_068200061-2120_007.fcs 
Found: WSP: 068200153-1500.wsp → FCS: Samples_CYTO_068200153-1500_007.fcs 
Found: WSP: 068200179-1100.wsp → FCS: Samples_CYTO_068200179-1100_008.fcs 
Found: WSP: 068200085-2120.wsp → FCS: Samples_CYTO_068200085-2120_008.fcs 
Found: WSP: 068200224-1000.wsp → FCS: Samples_CYTO_068200224-1000_007.fcs 
Found: WSP: 068200088-2120.wsp → FCS: Samples_CYTO_068200088-2120_007.fcs 
Found: WSP: 068200224-1100.wsp → FCS: Samples_CYTO_068200224-1100_007.fcs 
Found: WSP: 068200087-2120.wsp → FCS: Samples_CYTO_068200087-2120_008.fcs 
Found: WSP: 068200191-1500.wsp → FCS: Samples_CYTO_068200191-1500_007.fcs 
Found: WSP: 068200225-1000.wsp → FCS: Samples_CYTO_068200225-1000_007.fcs 
Found: WSP: 068200239-1000.wsp → FCS: Samples_CYTO_068200239-1000_008.fcs 
Found: WSP: 068200204-1500.wsp → FCS: Samples_CYTO_068200204-1500_007.fcs 
Found: WSP: 068200118-2120.wsp → FCS: Samples_CYTO_068200118-2120_008.fcs 
Found: WSP: 068200225-1100.wsp → FCS: Samples_CYTO_068200225-1100_007.fcs 
Found: WSP: 068200214-1500.wsp → FCS: Samples_CYTO_068200214-1500_008.fcs 
Found: WSP: 068200229-1000.wsp → FCS: Samples_CYTO_068200229-1000_009.fcs 
Found: WSP: 068200247-1000.wsp → FCS: Samples_CYTO_068200247-1000_007.fcs 
Found: WSP: 068200240-1000_July.wsp → FCS: Samples_CYTO_068200240-1000_007.fcs 
Found: WSP: 068200248-1000_July.wsp → FCS: Samples_CYTO_068200248-1000_008.fcs 
Found: WSP: 068200179-1500.wsp → FCS: Samples_CYTO_068200179-1500_007.fcs 
Found: WSP: 068200239-1200.wsp → FCS: Samples_CYTO_068200239-1200_007.fcs 
Found: WSP: 068200229-1100.wsp → FCS: Samples_CYTO_068200229-1100_007.fcs 
Found: WSP: 068200248-1100.wsp → FCS: Samples_CYTO_068200248-1100_008.fcs 
Found: WSP: 068200253-1000.wsp → FCS: Samples_CYTO_068200253-1000_007.fcs 
Found: WSP: 068200240-1100.wsp → FCS: Samples_CYTO_068200240-1100_007.fcs 
Found: WSP: 068200127-2120.wsp → FCS: Samples_CYTO_068200127-2120_007.fcs 
Found: WSP: 068200150-2120.wsp → FCS: Samples_CYTO_068200150-2120_008.fcs 
Found: WSP: 068200252-1100.wsp → FCS: Samples_CYTO_068200252-1100_007.fcs 
Found: WSP: 068200224-1500.wsp → FCS: Samples_CYTO_068200224-1500_007.fcs 
Found: WSP: 068200266-1000.wsp → FCS: Samples_CYTO_068200266-1000_007.fcs 
Found: WSP: 068200131-2120.wsp → FCS: Samples_CYTO_068200131-2120_007.fcs 
Found: WSP: 068200153-2120.wsp → FCS: Samples_CYTO_068200153-2120_008.fcs 
Found: WSP: 068200282-1000.wsp → FCS: Samples_CYTO_068200282-1000_007.fcs 
Found: WSP: 068200285-1000.wsp → FCS: Samples_CYTO_068200285-1000_008.fcs 
Found: WSP: 068200225-1500.wsp → FCS: Samples_CYTO_068200225-1500_008.fcs 
Found: WSP: 068200239-1500.wsp → FCS: Samples_CYTO_068200239-1500_007.fcs 
Found: WSP: 068200282-1100.wsp → FCS: Samples_CYTO_068200282-1100_007.fcs 
Found: WSP: 068200285-1100.wsp → FCS: Samples_CYTO_068200285-1100_008.fcs 
Found: WSP: 068200229-1500.wsp → FCS: Samples_CYTO_068200229-1500_008.fcs 
Found: WSP: 068200310-1000.wsp → FCS: Samples_CYTO_068200310-1000_007.fcs 
Found: WSP: 068200191-2120.wsp → FCS: Samples_CYTO_068200191-2120_009.fcs 
Found: WSP: 068200240-1500.wsp → FCS: Samples_CYTO_068200240-1500_007.fcs 
Found: WSP: 068200248-1500.wsp → FCS: Samples_CYTO_068200248-1500_008.fcs 
Found: WSP: 068200204-2120.wsp → FCS: Samples_CYTO_068200204-2120_008.fcs 
Found: WSP: 068200252-1500.wsp → FCS: Samples_CYTO_068200252-1500_007.fcs 
Found: WSP: 068200253-1500.wsp → FCS: Samples_CYTO_068200253-1500_008.fcs 
Found: WSP: 068200281-1000.wsp → FCS: Samples_CYTO_068200281-1000_007.fcs 
Found: WSP: 068200214-2120.wsp → FCS: Samples_CYTO_068200214-2120_007.fcs 
Found: WSP: 068200310-1200.wsp → FCS: Samples_CYTO_068200310-1200_007.fcs 
Found: WSP: 068200281-1100.wsp → FCS: Samples_CYTO_068200281-1100_007.fcs 
Found: WSP: 068200179-2120.wsp → FCS: Samples_CYTO_068200179-2120_007.fcs 
Found: WSP: 068200347-1000.wsp → FCS: Samples_CYTO_068200347-1000_007.fcs 
Found: WSP: 068200350-1000.wsp → FCS: Samples_CYTO_068200350-1000_007.fcs 
Found: WSP: 068200364-1000.wsp → FCS: Samples_CYTO_068200364-1000_008.fcs 
Found: WSP: 068200357-1000.wsp → FCS: Samples_CYTO_068200357-1000_008.fcs 
Found: WSP: 068200363-1000.wsp → FCS: Samples_CYTO_068200363-1000_007.fcs 
Found: WSP: 068200366-1000.wsp → FCS: Samples_CYTO_068200366-1000_009.fcs 
Found: WSP: 068200282-1500.wsp → FCS: Samples_CYTO_068200282-1500_007.fcs 
Found: WSP: 068200285-1500.wsp → FCS: Samples_CYTO_068200285-1500_008.fcs 
Found: WSP: 068200347-1100.wsp → FCS: Samples_CYTO_068200347-1100_007.fcs 
Found: WSP: 068200350-1100.wsp → FCS: Samples_CYTO_068200350-1100_008.fcs 
Found: WSP: 068200357-1100.wsp → FCS: Samples_CYTO_068200357-1100_013.fcs 
Found: WSP: 068200361-1000.wsp → FCS: Samples_CYTO_068200361-1000_012.fcs 
Found: WSP: 068200363-1100.wsp → FCS: Samples_CYTO_068200363-1100_007.fcs 
Found: WSP: 068200366-1100.wsp → FCS: Samples_CYTO_068200366-1100_010.fcs 
Found: WSP: 068200385-1000.wsp → FCS: Samples_CYTO_068200385-1000_011.fcs 
Found: WSP: 068200387-1000.wsp → FCS: Samples_CYTO_068200387-1000_008.fcs 
Found: WSP: 068200392-1000.wsp → FCS: Samples_CYTO_068200392-1000_009.fcs 
Found: WSP: 068200278-1000.wsp → FCS: Samples_CYTO_068200278-1000_008.fcs 
Found: WSP: 068200398-1000.wsp → FCS: Samples_CYTO_068200398-1000_007.fcs 
Found: WSP: 068200310-1500.wsp → FCS: Samples_CYTO_068200310-1500_008.fcs 
Found: WSP: 068200358-1000.wsp → FCS: Samples_CYTO_068200358-1000_009.fcs 
Found: WSP: 068200421-1000.wsp → FCS: Samples_CYTO_068200421-1000_007.fcs 
Found: WSP: 068200356-1000.wsp → FCS: Samples_CYTO_068200356-1000_011.fcs 
Found: WSP: 068200378-1000.wsp → FCS: Samples_CYTO_068200378-1000_010.fcs 
Found: WSP: 068200380-1000.wsp → FCS: Samples_CYTO_068200380-1000_009.fcs 
Found: WSP: 068200399-1000.wsp → FCS: Samples_CYTO_068200399-1000_008.fcs 
Found: WSP: 068200422-1000.wsp → FCS: Samples_CYTO_068200422-1000_007.fcs 
Found: WSP: 068200364-1200.wsp → FCS: Samples_CYTO_068200364-1200_008.fcs 
Found: WSP: 068200379-1000.wsp → FCS: Samples_CYTO_068200379-1000_010.fcs 
Found: WSP: 068200385-1100.wsp → FCS: Samples_CYTO_068200385-1100_007.fcs 
Found: WSP: 068200387-1100.wsp → FCS: Samples_CYTO_068200387-1100_009.fcs 
Found: WSP: 068200392-1100.wsp → FCS: Samples_CYTO_068200392-1100_011.fcs 
Found: WSP: 068200370-1000.wsp → FCS: Samples_CYTO_068200370-1000_007.fcs 
Found: WSP: 068200373-1000.wsp → FCS: Samples_CYTO_068200373-1000_008.fcs 
Found: WSP: 068200375-1000.wsp → FCS: Samples_CYTO_068200375-1000_009.fcs 
Found: WSP: 068200278-1100.wsp → FCS: Samples_CYTO_068200278-1100_007.fcs 
Found: WSP: 068200356-1100.wsp → FCS: Samples_CYTO_068200356-1100_008.fcs 
Found: WSP: 068200388-1000.wsp → FCS: Samples_CYTO_068200388-1000_010.fcs 
Found: WSP: 068200398-1100.wsp → FCS: Samples_CYTO_068200398-1100_009.fcs 
Found: WSP: 068200409-1000.wsp → FCS: Samples_CYTO_068200409-1000_012.fcs 
Found: WSP: 068200415-1000.wsp → FCS: Samples_CYTO_068200415-1000_011.fcs 
Found: WSP: 068200378-1100.wsp → FCS: Samples_CYTO_068200378-1100_007.fcs 
Found: WSP: 068200421-1100.wsp → FCS: Samples_CYTO_068200421-1100_008.fcs 
Found: WSP: 068200399-1100.wsp → FCS: Samples_CYTO_068200399-1100_010.fcs 
Found: WSP: 068200412-1000.wsp → FCS: Samples_CYTO_068200412-1000_007.fcs 
Found: WSP: 068200414-1000.wsp → FCS: Samples_CYTO_068200414-1000_009.fcs 
Found: WSP: 068200422-1100.wsp → FCS: Samples_CYTO_068200422-1100_008.fcs 
Found: WSP: 068200441-1000.wsp → FCS: Samples_CYTO_068200441-1000_011.fcs 
Found: WSP: 068200380-1100.wsp → FCS: Samples_CYTO_068200380-1100_009.fcs 
Found: WSP: 068200397-1000.wsp → FCS: Samples_CYTO_068200397-1000_008.fcs 
Found: WSP: 068200438-1000.wsp → FCS: Samples_CYTO_068200438-1000_007.fcs 
Found: WSP: 068200281-1500.wsp → FCS: Samples_CYTO_068200281-1500_010.fcs 
Found: WSP: 068200361-1200.wsp → FCS: Samples_CYTO_068200361-1200_007.fcs 
Found: WSP: 068200373-1100.wsp → FCS: Samples_CYTO_068200373-1100_009.fcs 
Found: WSP: 068200375-1100.wsp → FCS: Samples_CYTO_068200375-1100_008.fcs 
Found: WSP: 068200388-1100.wsp → FCS: Samples_CYTO_068200388-1100_009.fcs 
Found: WSP: 068200407-1000.wsp → FCS: Samples_CYTO_068200407-1000_007.fcs 
Found: WSP: 068200409-1100.wsp → FCS: Samples_CYTO_068200409-1100_010.fcs 
Found: WSP: 068200415-1100.wsp → FCS: Samples_CYTO_068200415-1100_008.fcs 
Found: WSP: 068200225-2120.wsp → FCS: Samples_CYTO_068200225-2120_012.fcs 
Found: WSP: 068200229-2120.wsp → FCS: Samples_CYTO_068200229-2120_010.fcs 
Found: WSP: 068200239-2120.wsp → FCS: Samples_CYTO_068200239-2120_009.fcs 
Found: WSP: 068200247-2120.wsp → FCS: Samples_CYTO_068200247-2120_011.fcs 
Found: WSP: 068200425-1000.wsp → FCS: Samples_CYTO_068200425-1000_008.fcs 
Found: WSP: 068200439-1000.wsp → FCS: Samples_CYTO_068200439-1000_007.fcs 
Found: WSP: 068200358-1200.wsp → FCS: Samples_CYTO_068200358-1200_011.fcs 
Found: WSP: 068200412-1100.wsp → FCS: Samples_CYTO_068200412-1100_010.fcs 
Found: WSP: 068200414-1100.wsp → FCS: Samples_CYTO_068200414-1100_009.fcs 
Found: WSP: 068200428-1000.wsp → FCS: Samples_CYTO_068200428-1000_007.fcs 
Found: WSP: 068200441-1100.wsp → FCS: Samples_CYTO_068200441-1100_008.fcs 
Found: WSP: 068200240-2120.wsp → FCS: Samples_CYTO_068200240-2120_008.fcs 
Found: WSP: 068200248-2120.wsp → FCS: Samples_CYTO_068200248-2120_009.fcs 
Found: WSP: 068200397-1100.wsp → FCS: Samples_CYTO_068200397-1100_007.fcs 
Found: WSP: 068200379-1200.wsp → FCS: Samples_CYTO_068200379-1200_007.fcs 
Found: WSP: 068200370-1200.wsp → FCS: Samples_CYTO_068200370-1200_009.fcs 
Found: WSP: 068200407-1100.wsp → FCS: Samples_CYTO_068200407-1100_008.fcs 
Found: WSP: 068200457-1000.wsp → FCS: Samples_CYTO_068200457-1000_007.fcs 
Found: WSP: 068200252-2120.wsp → FCS: Samples_CYTO_068200252-2120_009.fcs 
Found: WSP: 068200425-1100.wsp → FCS: Samples_CYTO_068200425-1100_007.fcs 
Found: WSP: 068200439-1100.wsp → FCS: Samples_CYTO_068200439-1100_008.fcs 
Found: WSP: 068200347-1500.wsp → FCS: Samples_CYTO_068200347-1500_008.fcs 
Found: WSP: 068200428-1100.wsp → FCS: Samples_CYTO_068200428-1100_007.fcs 
Found: WSP: 068200364-1500.wsp → FCS: Samples_CYTO_068200364-1500_007.fcs 
Found: WSP: 068200438-1200.wsp → FCS: Samples_CYTO_068200438-1200_007.fcs 
Found: WSP: 068200357-1500.wsp → FCS: Samples_CYTO_068200357-1500_007.fcs 
Found: WSP: 068200363-1500.wsp → FCS: Samples_CYTO_068200363-1500_009.fcs 
Found: WSP: 068200366-1500.wsp → FCS: Samples_CYTO_068200366-1500_008.fcs 
Found: WSP: 068200385-1500.wsp → FCS: Samples_CYTO_068200385-1500_007.fcs 
Found: WSP: 068200387-1500.wsp → FCS: Samples_CYTO_068200387-1500_008.fcs 
Found: WSP: 068200361-1500.wsp → FCS: Samples_CYTO_068200361-1500_008.fcs 
Found: WSP: 068200392-1500.wsp → FCS: Samples_CYTO_068200392-1500_007.fcs 
Found: WSP: 068200278-1500.wsp → FCS: Samples_CYTO_068200278-1500_007.fcs 
Found: WSP: 068200358-1500.wsp → FCS: Samples_CYTO_068200358-1500_009.fcs 
Found: WSP: 068200378-1500.wsp → FCS: Samples_CYTO_068200378-1500_008.fcs 
Found: WSP: 068200398-1500.wsp → FCS: Samples_CYTO_068200398-1500_010.fcs 
Found: WSP: 068200421-1500.wsp → FCS: Samples_CYTO_068200421-1500_007.fcs 
Found: WSP: 068200399-1500.wsp → FCS: Samples_CYTO_068200399-1500_008.fcs 
Found: WSP: 068200422-1500.wsp → FCS: Samples_CYTO_068200422-1500_007.fcs 
Found: WSP: 068200285-2120.wsp → FCS: Samples_CYTO_068200285-2120_010.fcs 
Found: WSP: 068200356-1500.wsp → FCS: Samples_CYTO_068200356-1500_007.fcs 
Found: WSP: 068200379-1500.wsp → FCS: Samples_CYTO_068200379-1500_008.fcs 
Found: WSP: 068200380-1500.wsp → FCS: Samples_CYTO_068200380-1500_009.fcs 
Found: WSP: 068200370-1500.wsp → FCS: Samples_CYTO_068200370-1500_007.fcs 
Found: WSP: 068200373-1500.wsp → FCS: Samples_CYTO_068200373-1500_008.fcs 
Found: WSP: 068200375-1500.wsp → FCS: Samples_CYTO_068200375-1500_009.fcs 
Found: WSP: 068200388-1500.wsp → FCS: Samples_CYTO_068200388-1500_010.fcs 
Found: WSP: 068200409-1500.wsp → FCS: Samples_CYTO_068200409-1500_008.fcs 
Found: WSP: 068200415-1500.wsp → FCS: Samples_CYTO_068200415-1500_007.fcs 
Found: WSP: 068200412-1500.wsp → FCS: Samples_CYTO_068200412-1500_007.fcs 
Found: WSP: 068200414-1500.wsp → FCS: Samples_CYTO_068200414-1500_009.fcs 
Found: WSP: 068200441-1500.wsp → FCS: Samples_CYTO_068200441-1500_008.fcs 
Found: WSP: 068200282-2120.wsp → FCS: Samples_CYTO_068200282-2120_008.fcs 
Found: WSP: 068200438-1500.wsp → FCS: Samples_CYTO_068200438-1500_007.fcs 
Found: WSP: 068200397-1500.wsp → FCS: Samples_CYTO_068200397-1500_007.fcs 
Found: WSP: 068200425-1500.wsp → FCS: Samples_CYTO_068200425-1500_007.fcs 
Found: WSP: 068200439-1500.wsp → FCS: Samples_CYTO_068200439-1500_007.fcs 
Found: WSP: 068200428-1500.wsp → FCS: Samples_CYTO_068200428-1500_007.fcs 
Found: WSP: 068200407-1500.wsp → FCS: Samples_CYTO_068200407-1500_007.fcs 
Found: WSP: 068200310-2120.wsp → FCS: Samples_CYTO_068200310-2120_007.fcs 
Found: WSP: 068200460-1000.wsp → FCS: Samples_CYTO_068200460-1000_007.fcs 
Found: WSP: 068200281-2120.wsp → FCS: Samples_CYTO_068200281-2120_009.fcs 
Found: WSP: 068200347-2120.wsp → FCS: Samples_CYTO_068200347-2120_007.fcs 
Found: WSP: 068200364-2120.wsp → FCS: Samples_CYTO_068200364-2120_008.fcs 
Found: WSP: 068200357-2120.wsp → FCS: Samples_CYTO_068200357-2120_008.fcs 
Found: WSP: 068200363-2120.wsp → FCS: Samples_CYTO_068200363-2120_009.fcs 
Found: WSP: 068200366-2120.wsp → FCS: Samples_CYTO_068200366-2120_007.fcs 
Found: WSP: 068200465-1000.wsp → FCS: Samples_CYTO_068200465-1000_007.fcs 
Found: WSP: 068200361-2120.wsp → FCS: Samples_CYTO_068200361-2120_008.fcs 
Found: WSP: 068200385-2120.wsp → FCS: Samples_CYTO_068200385-2120_007.fcs 
Found: WSP: 068200278-2120.wsp → FCS: Samples_CYTO_068200278-2120_011.fcs 
Found: WSP: 068200358-2120.wsp → FCS: Samples_CYTO_068200358-2120_009.fcs 
Found: WSP: 068200398-2120.wsp → FCS: Samples_CYTO_068200398-2120_008.fcs 
Found: WSP: 068200421-2120.wsp → FCS: Samples_CYTO_068200421-2120_010.fcs 
Found: WSP: 068200460-1100.wsp → FCS: Samples_CYTO_068200460-1100_007.fcs 
Found: WSP: 068200356-2120.wsp → FCS: Samples_CYTO_068200356-2120_007.fcs 
Found: WSP: 068200378-2120.wsp → FCS: Samples_CYTO_068200378-2120_009.fcs 
Found: WSP: 068200380-2120.wsp → FCS: Samples_CYTO_068200380-2120_008.fcs 
Found: WSP: 068200370-2120.wsp → FCS: Samples_CYTO_068200370-2120_007.fcs 
Found: WSP: 068200399-2120.wsp → FCS: Samples_CYTO_068200399-2120_008.fcs 
Found: WSP: 068200422-2120.wsp → FCS: Samples_CYTO_068200422-2120_009.fcs 
Found: WSP: 068200373_2120.wsp → FCS: Samples_CYTO_068200373-2120_008.fcs 
Found: WSP: 068200375_2120.wsp → FCS: Samples_CYTO_068200375-2120_007.fcs 
Found: WSP: 068200379_2120.wsp → FCS: Samples_CYTO_068200379-2120_009.fcs 
Found: WSP: 068200465-1200.wsp → FCS: Samples_CYTO_068200465-1200_007.fcs 
Found: WSP: 068200388-2120.wsp → FCS: Samples_CYTO_068200388-2120_007.fcs 
Found: WSP: 068200409-2120.wsp → FCS: Samples_CYTO_068200409-2120_008.fcs 
Found: WSP: 068200460-1500.wsp → FCS: Samples_CYTO_068200460-1500_007.fcs 
Found: WSP: 068200412-2120.wsp → FCS: Samples_CYTO_068200412-2120_007.fcs 
Found: WSP: 068200414-2120.wsp → FCS: Samples_CYTO_068200414-2120_008.fcs 
Found: WSP: 068200441-2120.wsp → FCS: Samples_CYTO_068200441-2120_008.fcs 
Found: WSP: 068200465-1500.wsp → FCS: Samples_CYTO_068200465-1500_007.fcs 
Found: WSP: 068200397-2120.wsp → FCS: Samples_CYTO_068200397-2120_009.fcs 
Found: WSP: 068200415-2120.wsp → FCS: Samples_CYTO_068200415-2120_008.fcs 
Found: WSP: 068200438-2120.wsp → FCS: Samples_CYTO_068200438-2120_007.fcs 
Found: WSP: 068200407-2120.wsp → FCS: Specimen_001_Cyto-068200407-2120.fcs 
Found: WSP: 068200425-2120.wsp → FCS: Specimen_001_Cyto-068200425-2120.fcs 
Found: WSP: 068200428-2120.wsp → FCS: Specimen_001_Cyto-068200428-2120.fcs 
Found: WSP: 068200439-2120.wsp → FCS: Specimen_001_Cyto-068200439-2120.fcs 
Found: WSP: 068200460-2120.wsp → FCS: Samples_CYTO_068200460-2120_007.fcs 
Found: WSP: 068200465-2120.wsp → FCS: Samples_CYTO_068200465-2120_007.fcs 
Code
# Create inventory data frame
sa_inventory <- 
  data.frame(
    wsp_file = valid_wsp_files,
    fcs_file = valid_fcs_files,
    folder = dirname(valid_wsp_files),
    stringsAsFactors = FALSE
  )

# Print summary
cat("\n\nFound", nrow(sa_inventory), "valid samples with both WSP and FCS files\n")


Found 272 valid samples with both WSP and FCS files
Code
# Display first few entries if any found
if (nrow(sa_inventory) > 0) {
  print(head(sa_inventory))
}
                                                                                                                                                                                             wsp_file
1 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-09_068200020_1000/068200020-1000.wsp
2 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-15_068200008_1000/068200008-1000.wsp
3 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-15_068200023_1000/068200023-1000.wsp
4 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-22_068200008_1100/068200008-1100.wsp
5 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-22_068200023_1100/068200023-1100.wsp
6 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-27_068200028_1000/068200028-1000.wsp
                                                                                                                                                                                                                           fcs_file
1 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-09_068200020_1000/Samples_Cytobrush_2137387_068200020_1000_007.fcs
2      /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-15_068200008_1000/Samples_CYTO_2145791-068200008-1000_007.fcs
3      /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-15_068200023_1000/Samples_CYTO_2145780-068200023-1000_008.fcs
4      /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-22_068200008_1100/Samples_CYTO_2145791-068200008-1100_007.fcs
5      /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-22_068200023_1100/Samples_CYTO_2152241-068200023-1100_008.fcs
6      /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-27_068200028_1000/Samples_CYTO_2154415-068200028-1000_007.fcs
                                                                                                                                                                            folder
1 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-09_068200020_1000
2 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-15_068200008_1000
3 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-15_068200023_1000
4 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-22_068200008_1100
5 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-22_068200023_1100
6 /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files//samples/2024-02-27_068200028_1000
Code
write.csv(sa_inventory, file.path(out_dir, "sa_sample_inventory.csv"), row.names = FALSE)
saveRDS(valid_wsp_files, file.path(out_dir, "sa_valid_wsp_files.rds"))
saveRDS(valid_fcs_files, file.path(out_dir, "sa_valid_fcs_files.rds"))

cat("\nInventory saved to:", file.path(out_dir, "sa_sample_inventory.csv"), "\n")

Inventory saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/CAP/step 01/sa_sample_inventory.csv 
Code
cat("WSP file vector saved to:", file.path(out_dir, "sa_valid_wsp_files.rds"), "\n")
WSP file vector saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/CAP/step 01/sa_valid_wsp_files.rds 
Code
cat("FCS file vector saved to:", file.path(out_dir, "sa_valid_fcs_files.rds"), "\n")
FCS file vector saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/CAP/step 01/sa_valid_fcs_files.rds 
Code
# cat(" There are in fact 276 sample folders in the SA directory \n")
Code
rm(sa_inventory, valid_wsp_files, valid_fcs_files, wsp_dir, wsp_file, all_wsp_files)

Step 2: Verify file data

Code
# directories
sa_base_dir <- str_c(base_dir, "samples")
out_dir <- str_c(output_dir, "CAP/step 02/")

# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir)
}
Code
# Run the processing
sa_inventory <- process_sa_samples(sa_base_dir, out_dir = out_dir)
Processing South Africa samples from: /Users/laurasymul/Dropbox/Academia/Projects/VIBRANT Study Files/90_VIBRANT_consolidated_data/06 Flow cytometry/reorganized_south_africa_files/samples 
Found 272 folders
Created 272 inventory entries
Found 74 unique subject IDs
Inventory saved to sa_sample_inventory.csv

Detailed summary:
Folders with both WSP and FCS files: 272 
Folders with only WSP files: 0 
Folders with only FCS files: 0 
Folders with no files: 0 
No folders with multiple WSP or FCS files found
there
Cleaned inventory contains 272 entries
Cleaned inventory saved to sa_sample_inventory_cleaned.csv

Step 3: Extracting gating structure

Code
# Set base directory and output directory
out_dir <- str_c(output_dir, "CAP/step 03/")

# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir)
}
Code
# Run the processing
# Start with test mode = TRUE to check a few files first
# If the test looks good, run the full processing with test_mode = FALSE
results <- 
  process_inventory(
    inventory_file = str_c(output_dir, "CAP/step 02/sa_sample_inventory_cleaned.csv"), 
    batch_size = 10, 
    test_mode = FALSE,
    out_dir = out_dir
    )

==== Processing Batch 1 of 28 ====
Files 1 to 10 of 272 
Processing file 1 of 10: 068200020-1000.wsp (Participant: 68200020, Visit: 1000)
Successfully processed
Processing file 2 of 10: 068200008-1000.wsp (Participant: 68200008, Visit: 1000)
Successfully processed
Processing file 3 of 10: 068200023-1000.wsp (Participant: 68200023, Visit: 1000)
Successfully processed
Processing file 4 of 10: 068200008-1100.wsp (Participant: 68200008, Visit: 1100)
Successfully processed
Processing file 5 of 10: 068200023-1100.wsp (Participant: 68200023, Visit: 1100)
Successfully processed
Processing file 6 of 10: 068200028-1000.wsp (Participant: 68200028, Visit: 1000)
Successfully processed
Processing file 7 of 10: 068200028-1100.wsp (Participant: 68200028, Visit: 1100)
Successfully processed
Processing file 8 of 10: 068200044-1000.wsp (Participant: 68200044, Visit: 1000)
Successfully processed
Processing file 9 of 10: 068200044-1100.wsp (Participant: 68200044, Visit: 1100)
Successfully processed
Processing file 10 of 10: 068200062-1000.wsp (Participant: 68200062, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_1_to_10.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 2 of 28 ====
Files 11 to 20 of 272 
Processing file 11 of 20: 068200020-1500.wsp (Participant: 68200020, Visit: 1500)
Successfully processed
Processing file 12 of 20: 068200023-1500.wsp (Participant: 68200023, Visit: 1500)
Successfully processed
Processing file 13 of 20: 068200050-1000.wsp (Participant: 68200050, Visit: 1000)
Successfully processed
Processing file 14 of 20: 068200008-1500.wsp (Participant: 68200008, Visit: 1500)
Successfully processed
Processing file 15 of 20: 068200058-1000.wsp (Participant: 68200058, Visit: 1000)
Successfully processed
Processing file 16 of 20: 068200050-1100.wsp (Participant: 68200050, Visit: 1100)
Successfully processed
Processing file 17 of 20: 068200062-1100.wsp (Participant: 68200062, Visit: 1100)
Successfully processed
Processing file 18 of 20: 068200061-1000.wsp (Participant: 68200061, Visit: 1000)
Successfully processed
Processing file 19 of 20: 068200070-1000.wsp (Participant: 68200070, Visit: 1000)
Successfully processed
Processing file 20 of 20: 068200058-1100.wsp (Participant: 68200058, Visit: 1100)
Successfully processed
Saved batch results to gating_results_batch_11_to_20.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 3 of 28 ====
Files 21 to 30 of 272 
Processing file 21 of 30: 068200028-1500.wsp (Participant: 68200028, Visit: 1500)
Successfully processed
Processing file 22 of 30: 068200085-1000.wsp (Participant: 68200085, Visit: 1000)
Successfully processed
Processing file 23 of 30: 068200088-1000.wsp (Participant: 68200088, Visit: 1000)
Successfully processed
Processing file 24 of 30: 068200085-1100.wsp (Participant: 68200085, Visit: 1100)
Successfully processed
Processing file 25 of 30: 068200087-1000.wsp (Participant: 68200087, Visit: 1000)
Successfully processed
Processing file 26 of 30: 068200088-1100.wsp (Participant: 68200088, Visit: 1100)
Successfully processed
Processing file 27 of 30: 068200044-1500.wsp (Participant: 68200044, Visit: 1500)
Successfully processed
Processing file 28 of 30: 068200087-1100.wsp (Participant: 68200087, Visit: 1100)
Successfully processed
Processing file 29 of 30: 068200050-1500.wsp (Participant: 68200050, Visit: 1500)
Successfully processed
Processing file 30 of 30: 068200118-1000.wsp (Participant: 68200118, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_21_to_30.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 4 of 28 ====
Files 31 to 40 of 272 
Processing file 31 of 40: 068200058-1500.wsp (Participant: 68200058, Visit: 1500)
Successfully processed
Processing file 32 of 40: 068200118-1100.wsp (Participant: 68200118, Visit: 1100)
Successfully processed
Processing file 33 of 40: 068200061-1500.wsp (Participant: 68200061, Visit: 1500)
Successfully processed
Processing file 34 of 40: 068200062-1511.wsp (Participant: 68200062, Visit: 1511)
Successfully processed
Processing file 35 of 40: 068200020-2120.wsp (Participant: 68200020, Visit: 2120)
Successfully processed
Processing file 36 of 40: 068200127-1000.wsp (Participant: 68200127, Visit: 1000)
Successfully processed
Processing file 37 of 40: 068200008-2120.wsp (Participant: 68200008, Visit: 2120)
Successfully processed
Processing file 38 of 40: 068200150-1000.wsp (Participant: 68200150, Visit: 1000)
Successfully processed
Processing file 39 of 40: 068200023-2120.wsp (Participant: 68200023, Visit: 2120)
Successfully processed
Processing file 40 of 40: 068200127-1100.wsp (Participant: 68200127, Visit: 1100)
Successfully processed
Saved batch results to gating_results_batch_31_to_40.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 5 of 28 ====
Files 41 to 50 of 272 
Processing file 41 of 50: 068200085-1500.wsp (Participant: 68200085, Visit: 1500)
Successfully processed
Processing file 42 of 50: 068200088-1500.wsp (Participant: 68200088, Visit: 1500)
Successfully processed
Processing file 43 of 50: 068200131-1000.wsp (Participant: 68200131, Visit: 1000)
Successfully processed
Processing file 44 of 50: 068200153-1000.wsp (Participant: 68200153, Visit: 1000)
Successfully processed
Processing file 45 of 50: 068200028-2120.wsp (Participant: 68200028, Visit: 2120)
Successfully processed
Processing file 46 of 50: 068200131-1100.wsp (Participant: 68200131, Visit: 1100)
Successfully processed
Processing file 47 of 50: 068200153-1100.wsp (Participant: 68200153, Visit: 1100)
Successfully processed
Processing file 48 of 50: 068200118-1500.wsp (Participant: 68200118, Visit: 1500)
Successfully processed
Processing file 49 of 50: 068200087-1511.wsp (Participant: 68200087, Visit: 1511)
Successfully processed
Processing file 50 of 50: 068200044-2120.wsp (Participant: 68200044, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_41_to_50.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 6 of 28 ====
Files 51 to 60 of 272 
Processing file 51 of 60: 068200191-1000.wsp (Participant: 68200191, Visit: 1000)
Successfully processed
Processing file 52 of 60: 068200204-1000.wsp (Participant: 68200204, Visit: 1000)
Successfully processed
Processing file 53 of 60: 068200127-1500.wsp (Participant: 68200127, Visit: 1500)
Successfully processed
Processing file 54 of 60: 068200050-2120.wsp (Participant: 68200050, Visit: 2120)
Successfully processed
Processing file 55 of 60: 068200214-1000.wsp (Participant: 68200214, Visit: 1000)
Successfully processed
Processing file 56 of 60: 068200058-2120.wsp (Participant: 68200058, Visit: 2120)
Successfully processed
Processing file 57 of 60: 068200150-1500.wsp (Participant: 68200150, Visit: 1500)
Successfully processed
Processing file 58 of 60: 068200204-1100.wsp (Participant: 68200204, Visit: 1100)
Successfully processed
Processing file 59 of 60: 068200214-1100.wsp (Participant: 68200214, Visit: 1100)
Successfully processed
Processing file 60 of 60: 068200131-1500.wsp (Participant: 68200131, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_51_to_60.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 7 of 28 ====
Files 61 to 70 of 272 
Processing file 61 of 70: 068200179-1000.wsp (Participant: 68200179, Visit: 1000)
Successfully processed
Processing file 62 of 70: 068200061-2120.wsp (Participant: 68200061, Visit: 2120)
Successfully processed
Processing file 63 of 70: 068200153-1500.wsp (Participant: 68200153, Visit: 1500)
Successfully processed
Processing file 64 of 70: 068200179-1100.wsp (Participant: 68200179, Visit: 1100)
Successfully processed
Processing file 65 of 70: 068200085-2120.wsp (Participant: 68200085, Visit: 2120)
Successfully processed
Processing file 66 of 70: 068200224-1000.wsp (Participant: 68200224, Visit: 1000)
Successfully processed
Processing file 67 of 70: 068200088-2120.wsp (Participant: 68200088, Visit: 2120)
Successfully processed
Processing file 68 of 70: 068200224-1100.wsp (Participant: 68200224, Visit: 1100)
Successfully processed
Processing file 69 of 70: 068200087-2120.wsp (Participant: 68200087, Visit: 2120)
Successfully processed
Processing file 70 of 70: 068200191-1500.wsp (Participant: 68200191, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_61_to_70.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 8 of 28 ====
Files 71 to 80 of 272 
Processing file 71 of 80: 068200225-1000.wsp (Participant: 68200225, Visit: 1000)
Successfully processed
Processing file 72 of 80: 068200239-1000.wsp (Participant: 68200239, Visit: 1000)
Successfully processed
Processing file 73 of 80: 068200204-1500.wsp (Participant: 68200204, Visit: 1500)
FCS not found for sample Samples_CD86 FMO_005.fcs_0000000000000136047 from searching the file extension: .fcs
FCS not found for sample Samples_CD80 FMO_004.fcs_0000000000000135965 from searching the file extension: .fcs
FCS not found for sample Samples_CD40 FMO_003.fcs_0000000000000166595 from searching the file extension: .fcs
FCS not found for sample Samples_CD38 FMO_006.fcs_0000000000000135989 from searching the file extension: .fcs
Successfully processed
Processing file 74 of 80: 068200118-2120.wsp (Participant: 68200118, Visit: 2120)
Successfully processed
Processing file 75 of 80: 068200225-1100.wsp (Participant: 68200225, Visit: 1100)
Successfully processed
Processing file 76 of 80: 068200214-1500.wsp (Participant: 68200214, Visit: 1500)
Successfully processed
Processing file 77 of 80: 068200229-1000.wsp (Participant: 68200229, Visit: 1000)
Successfully processed
Processing file 78 of 80: 068200247-1000.wsp (Participant: 68200247, Visit: 1000)
Successfully processed
Processing file 79 of 80: 068200240-1000_July.wsp (Participant: 68200240, Visit: 1000)
FCS not found for sample Samples_CD86 FMO_005.fcs_0000000000000176650 from searching the file extension: .fcs
FCS not found for sample Samples_CD80 FMO_004.fcs_0000000000000177100 from searching the file extension: .fcs
FCS not found for sample Samples_CD40 FMO_003.fcs_0000000000000208036 from searching the file extension: .fcs
FCS not found for sample Samples_CD38 FMO_006.fcs_0000000000000197015 from searching the file extension: .fcs
Successfully processed
Processing file 80 of 80: 068200248-1000_July.wsp (Participant: 68200248, Visit: 1000)
FCS not found for sample Samples_CD86 FMO_005.fcs_0000000000000176650 from searching the file extension: .fcs
FCS not found for sample Samples_CD80 FMO_004.fcs_0000000000000177100 from searching the file extension: .fcs
FCS not found for sample Samples_CD40 FMO_003.fcs_0000000000000208036 from searching the file extension: .fcs
FCS not found for sample Samples_CD38 FMO_006.fcs_0000000000000197015 from searching the file extension: .fcs
Successfully processed
Saved batch results to gating_results_batch_71_to_80.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 9 of 28 ====
Files 81 to 90 of 272 
Processing file 81 of 90: 068200179-1500.wsp (Participant: 68200179, Visit: 1500)
Successfully processed
Processing file 82 of 90: 068200239-1200.wsp (Participant: 68200239, Visit: 1200)
Successfully processed
Processing file 83 of 90: 068200229-1100.wsp (Participant: 68200229, Visit: 1100)
Successfully processed
Processing file 84 of 90: 068200248-1100.wsp (Participant: 68200248, Visit: 1100)
Successfully processed
Processing file 85 of 90: 068200253-1000.wsp (Participant: 68200253, Visit: 1000)
Successfully processed
Processing file 86 of 90: 068200240-1100.wsp (Participant: 68200240, Visit: 1100)
Successfully processed
Processing file 87 of 90: 068200127-2120.wsp (Participant: 68200127, Visit: 2120)
Successfully processed
Processing file 88 of 90: 068200150-2120.wsp (Participant: 68200150, Visit: 2120)
Successfully processed
Processing file 89 of 90: 068200252-1100.wsp (Participant: 68200252, Visit: 1100)
Successfully processed
Processing file 90 of 90: 068200224-1500.wsp (Participant: 68200224, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_81_to_90.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 10 of 28 ====
Files 91 to 100 of 272 
Processing file 91 of 100: 068200266-1000.wsp (Participant: 68200266, Visit: 1000)
Successfully processed
Processing file 92 of 100: 068200131-2120.wsp (Participant: 68200131, Visit: 2120)
Successfully processed
Processing file 93 of 100: 068200153-2120.wsp (Participant: 68200153, Visit: 2120)
Successfully processed
Processing file 94 of 100: 068200282-1000.wsp (Participant: 68200282, Visit: 1000)
Successfully processed
Processing file 95 of 100: 068200285-1000.wsp (Participant: 68200285, Visit: 1000)
Successfully processed
Processing file 96 of 100: 068200225-1500.wsp (Participant: 68200225, Visit: 1500)
Successfully processed
Processing file 97 of 100: 068200239-1500.wsp (Participant: 68200239, Visit: 1500)
Successfully processed
Processing file 98 of 100: 068200282-1100.wsp (Participant: 68200282, Visit: 1100)
Successfully processed
Processing file 99 of 100: 068200285-1100.wsp (Participant: 68200285, Visit: 1100)
Successfully processed
Processing file 100 of 100: 068200229-1500.wsp (Participant: 68200229, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_91_to_100.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 11 of 28 ====
Files 101 to 110 of 272 
Processing file 101 of 110: 068200310-1000.wsp (Participant: 68200310, Visit: 1000)
Successfully processed
Processing file 102 of 110: 068200191-2120.wsp (Participant: 68200191, Visit: 2120)
Successfully processed
Processing file 103 of 110: 068200240-1500.wsp (Participant: 68200240, Visit: 1500)
Successfully processed
Processing file 104 of 110: 068200248-1500.wsp (Participant: 68200248, Visit: 1500)
Successfully processed
Processing file 105 of 110: 068200204-2120.wsp (Participant: 68200204, Visit: 2120)
Successfully processed
Processing file 106 of 110: 068200252-1500.wsp (Participant: 68200252, Visit: 1500)
Successfully processed
Processing file 107 of 110: 068200253-1500.wsp (Participant: 68200253, Visit: 1500)
Successfully processed
Processing file 108 of 110: 068200281-1000.wsp (Participant: 68200281, Visit: 1000)
Successfully processed
Processing file 109 of 110: 068200214-2120.wsp (Participant: 68200214, Visit: 2120)
Successfully processed
Processing file 110 of 110: 068200310-1200.wsp (Participant: 68200310, Visit: 1200)
Successfully processed
Saved batch results to gating_results_batch_101_to_110.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 12 of 28 ====
Files 111 to 120 of 272 
Processing file 111 of 120: 068200281-1100.wsp (Participant: 68200281, Visit: 1100)
Successfully processed
Processing file 112 of 120: 068200179-2120.wsp (Participant: 68200179, Visit: 2120)
Successfully processed
Processing file 113 of 120: 068200347-1000.wsp (Participant: 68200347, Visit: 1000)
Successfully processed
Processing file 114 of 120: 068200350-1000.wsp (Participant: 68200350, Visit: 1000)
Successfully processed
Processing file 115 of 120: 068200364-1000.wsp (Participant: 68200364, Visit: 1000)
Successfully processed
Processing file 116 of 120: 068200357-1000.wsp (Participant: 68200357, Visit: 1000)
Successfully processed
Processing file 117 of 120: 068200363-1000.wsp (Participant: 68200363, Visit: 1000)
Successfully processed
Processing file 118 of 120: 068200366-1000.wsp (Participant: 68200366, Visit: 1000)
Successfully processed
Processing file 119 of 120: 068200282-1500.wsp (Participant: 68200282, Visit: 1500)
Successfully processed
Processing file 120 of 120: 068200285-1500.wsp (Participant: 68200285, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_111_to_120.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 13 of 28 ====
Files 121 to 130 of 272 
Processing file 121 of 130: 068200347-1100.wsp (Participant: 68200347, Visit: 1100)
Successfully processed
Processing file 122 of 130: 068200350-1100.wsp (Participant: 68200350, Visit: 1100)
Successfully processed
Processing file 123 of 130: 068200357-1100.wsp (Participant: 68200357, Visit: 1100)
Successfully processed
Processing file 124 of 130: 068200361-1000.wsp (Participant: 68200361, Visit: 1000)
Successfully processed
Processing file 125 of 130: 068200363-1100.wsp (Participant: 68200363, Visit: 1100)
Successfully processed
Processing file 126 of 130: 068200366-1100.wsp (Participant: 68200366, Visit: 1100)
Successfully processed
Processing file 127 of 130: 068200385-1000.wsp (Participant: 68200385, Visit: 1000)
Successfully processed
Processing file 128 of 130: 068200387-1000.wsp (Participant: 68200387, Visit: 1000)
Successfully processed
Processing file 129 of 130: 068200392-1000.wsp (Participant: 68200392, Visit: 1000)
Successfully processed
Processing file 130 of 130: 068200278-1000.wsp (Participant: 68200278, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_121_to_130.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 14 of 28 ====
Files 131 to 140 of 272 
Processing file 131 of 140: 068200398-1000.wsp (Participant: 68200398, Visit: 1000)
Successfully processed
Processing file 132 of 140: 068200310-1500.wsp (Participant: 68200310, Visit: 1500)
Successfully processed
Processing file 133 of 140: 068200358-1000.wsp (Participant: 68200358, Visit: 1000)
Successfully processed
Processing file 134 of 140: 068200421-1000.wsp (Participant: 68200421, Visit: 1000)
Successfully processed
Processing file 135 of 140: 068200356-1000.wsp (Participant: 68200356, Visit: 1000)
Successfully processed
Processing file 136 of 140: 068200378-1000.wsp (Participant: 68200378, Visit: 1000)
Successfully processed
Processing file 137 of 140: 068200380-1000.wsp (Participant: 68200380, Visit: 1000)
Successfully processed
Processing file 138 of 140: 068200399-1000.wsp (Participant: 68200399, Visit: 1000)
Successfully processed
Processing file 139 of 140: 068200422-1000.wsp (Participant: 68200422, Visit: 1000)
Successfully processed
Processing file 140 of 140: 068200364-1200.wsp (Participant: 68200364, Visit: 1200)
Successfully processed
Saved batch results to gating_results_batch_131_to_140.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 15 of 28 ====
Files 141 to 150 of 272 
Processing file 141 of 150: 068200379-1000.wsp (Participant: 68200379, Visit: 1000)
Successfully processed
Processing file 142 of 150: 068200385-1100.wsp (Participant: 68200385, Visit: 1100)
Successfully processed
Processing file 143 of 150: 068200387-1100.wsp (Participant: 68200387, Visit: 1100)
Successfully processed
Processing file 144 of 150: 068200392-1100.wsp (Participant: 68200392, Visit: 1100)
Successfully processed
Processing file 145 of 150: 068200370-1000.wsp (Participant: 68200370, Visit: 1000)
Successfully processed
Processing file 146 of 150: 068200373-1000.wsp (Participant: 68200373, Visit: 1000)
Successfully processed
Processing file 147 of 150: 068200375-1000.wsp (Participant: 68200375, Visit: 1000)
Successfully processed
Processing file 148 of 150: 068200278-1100.wsp (Participant: 68200278, Visit: 1100)
Successfully processed
Processing file 149 of 150: 068200356-1100.wsp (Participant: 68200356, Visit: 1100)
Successfully processed
Processing file 150 of 150: 068200388-1000.wsp (Participant: 68200388, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_141_to_150.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 16 of 28 ====
Files 151 to 160 of 272 
Processing file 151 of 160: 068200398-1100.wsp (Participant: 68200398, Visit: 1100)
Successfully processed
Processing file 152 of 160: 068200409-1000.wsp (Participant: 68200409, Visit: 1000)
Successfully processed
Processing file 153 of 160: 068200415-1000.wsp (Participant: 68200415, Visit: 1000)
Successfully processed
Processing file 154 of 160: 068200378-1100.wsp (Participant: 68200378, Visit: 1100)
Successfully processed
Processing file 155 of 160: 068200421-1100.wsp (Participant: 68200421, Visit: 1100)
Successfully processed
Processing file 156 of 160: 068200399-1100.wsp (Participant: 68200399, Visit: 1100)
Successfully processed
Processing file 157 of 160: 068200412-1000.wsp (Participant: 68200412, Visit: 1000)
Successfully processed
Processing file 158 of 160: 068200414-1000.wsp (Participant: 68200414, Visit: 1000)
Successfully processed
Processing file 159 of 160: 068200422-1100.wsp (Participant: 68200422, Visit: 1100)
Successfully processed
Processing file 160 of 160: 068200441-1000.wsp (Participant: 68200441, Visit: 1000)
Successfully processed
Saved batch results to gating_results_batch_151_to_160.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 17 of 28 ====
Files 161 to 170 of 272 
Processing file 161 of 170: 068200380-1100.wsp (Participant: 68200380, Visit: 1100)
Successfully processed
Processing file 162 of 170: 068200397-1000.wsp (Participant: 68200397, Visit: 1000)
Successfully processed
Processing file 163 of 170: 068200438-1000.wsp (Participant: 68200438, Visit: 1000)
Successfully processed
Processing file 164 of 170: 068200281-1500.wsp (Participant: 68200281, Visit: 1500)
Successfully processed
Processing file 165 of 170: 068200361-1200.wsp (Participant: 68200361, Visit: 1200)
Successfully processed
Processing file 166 of 170: 068200373-1100.wsp (Participant: 68200373, Visit: 1100)
Successfully processed
Processing file 167 of 170: 068200375-1100.wsp (Participant: 68200375, Visit: 1100)
Successfully processed
Processing file 168 of 170: 068200388-1100.wsp (Participant: 68200388, Visit: 1100)
Successfully processed
Processing file 169 of 170: 068200407-1000.wsp (Participant: 68200407, Visit: 1000)
Successfully processed
Processing file 170 of 170: 068200409-1100.wsp (Participant: 68200409, Visit: 1100)
Successfully processed
Saved batch results to gating_results_batch_161_to_170.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 18 of 28 ====
Files 171 to 180 of 272 
Processing file 171 of 180: 068200415-1100.wsp (Participant: 68200415, Visit: 1100)
Successfully processed
Processing file 172 of 180: 068200225-2120.wsp (Participant: 68200225, Visit: 2120)
Successfully processed
Processing file 173 of 180: 068200229-2120.wsp (Participant: 68200229, Visit: 2120)
Successfully processed
Processing file 174 of 180: 068200239-2120.wsp (Participant: 68200239, Visit: 2120)
Successfully processed
Processing file 175 of 180: 068200247-2120.wsp (Participant: 68200247, Visit: 2120)
Successfully processed
Processing file 176 of 180: 068200425-1000.wsp (Participant: 68200425, Visit: 1000)
Successfully processed
Processing file 177 of 180: 068200439-1000.wsp (Participant: 68200439, Visit: 1000)
Successfully processed
Processing file 178 of 180: 068200358-1200.wsp (Participant: 68200358, Visit: 1200)
Successfully processed
Processing file 179 of 180: 068200412-1100.wsp (Participant: 68200412, Visit: 1100)
Successfully processed
Processing file 180 of 180: 068200414-1100.wsp (Participant: 68200414, Visit: 1100)
Successfully processed
Saved batch results to gating_results_batch_171_to_180.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 19 of 28 ====
Files 181 to 190 of 272 
Processing file 181 of 190: 068200428-1000.wsp (Participant: 68200428, Visit: 1000)
Successfully processed
Processing file 182 of 190: 068200441-1100.wsp (Participant: 68200441, Visit: 1100)
Successfully processed
Processing file 183 of 190: 068200240-2120.wsp (Participant: 68200240, Visit: 2120)
Successfully processed
Processing file 184 of 190: 068200248-2120.wsp (Participant: 68200248, Visit: 2120)
Successfully processed
Processing file 185 of 190: 068200397-1100.wsp (Participant: 68200397, Visit: 1100)
Successfully processed
Processing file 186 of 190: 068200379-1200.wsp (Participant: 68200379, Visit: 1200)
Successfully processed
Processing file 187 of 190: 068200370-1200.wsp (Participant: 68200370, Visit: 1200)
Successfully processed
Processing file 188 of 190: 068200407-1100.wsp (Participant: 68200407, Visit: 1100)
Successfully processed
Processing file 189 of 190: 068200457-1000.wsp (Participant: 68200457, Visit: 1000)
Successfully processed
Processing file 190 of 190: 068200252-2120.wsp (Participant: 68200252, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_181_to_190.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 20 of 28 ====
Files 191 to 200 of 272 
Processing file 191 of 200: 068200425-1100.wsp (Participant: 68200425, Visit: 1100)
Successfully processed
Processing file 192 of 200: 068200439-1100.wsp (Participant: 68200439, Visit: 1100)
Successfully processed
Processing file 193 of 200: 068200347-1500.wsp (Participant: 68200347, Visit: 1500)
Successfully processed
Processing file 194 of 200: 068200428-1100.wsp (Participant: 68200428, Visit: 1100)
Successfully processed
Processing file 195 of 200: 068200364-1500.wsp (Participant: 68200364, Visit: 1500)
Successfully processed
Processing file 196 of 200: 068200438-1200.wsp (Participant: 68200438, Visit: 1200)
Successfully processed
Processing file 197 of 200: 068200357-1500.wsp (Participant: 68200357, Visit: 1500)
Successfully processed
Processing file 198 of 200: 068200363-1500.wsp (Participant: 68200363, Visit: 1500)
Successfully processed
Processing file 199 of 200: 068200366-1500.wsp (Participant: 68200366, Visit: 1500)
Successfully processed
Processing file 200 of 200: 068200385-1500.wsp (Participant: 68200385, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_191_to_200.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 21 of 28 ====
Files 201 to 210 of 272 
Processing file 201 of 210: 068200387-1500.wsp (Participant: 68200387, Visit: 1500)
Successfully processed
Processing file 202 of 210: 068200361-1500.wsp (Participant: 68200361, Visit: 1500)
Successfully processed
Processing file 203 of 210: 068200392-1500.wsp (Participant: 68200392, Visit: 1500)
Successfully processed
Processing file 204 of 210: 068200278-1500.wsp (Participant: 68200278, Visit: 1500)
Successfully processed
Processing file 205 of 210: 068200358-1500.wsp (Participant: 68200358, Visit: 1500)
Successfully processed
Processing file 206 of 210: 068200378-1500.wsp (Participant: 68200378, Visit: 1500)
Successfully processed
Processing file 207 of 210: 068200398-1500.wsp (Participant: 68200398, Visit: 1500)
Successfully processed
Processing file 208 of 210: 068200421-1500.wsp (Participant: 68200421, Visit: 1500)
Successfully processed
Processing file 209 of 210: 068200399-1500.wsp (Participant: 68200399, Visit: 1500)
Successfully processed
Processing file 210 of 210: 068200422-1500.wsp (Participant: 68200422, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_201_to_210.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 22 of 28 ====
Files 211 to 220 of 272 
Processing file 211 of 220: 068200285-2120.wsp (Participant: 68200285, Visit: 2120)
Successfully processed
Processing file 212 of 220: 068200356-1500.wsp (Participant: 68200356, Visit: 1500)
Successfully processed
Processing file 213 of 220: 068200379-1500.wsp (Participant: 68200379, Visit: 1500)
Successfully processed
Processing file 214 of 220: 068200380-1500.wsp (Participant: 68200380, Visit: 1500)
Successfully processed
Processing file 215 of 220: 068200370-1500.wsp (Participant: 68200370, Visit: 1500)
Successfully processed
Processing file 216 of 220: 068200373-1500.wsp (Participant: 68200373, Visit: 1500)
Successfully processed
Processing file 217 of 220: 068200375-1500.wsp (Participant: 68200375, Visit: 1500)
Successfully processed
Processing file 218 of 220: 068200388-1500.wsp (Participant: 68200388, Visit: 1500)
Successfully processed
Processing file 219 of 220: 068200409-1500.wsp (Participant: 68200409, Visit: 1500)
Successfully processed
Processing file 220 of 220: 068200415-1500.wsp (Participant: 68200415, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_211_to_220.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 23 of 28 ====
Files 221 to 230 of 272 
Processing file 221 of 230: 068200412-1500.wsp (Participant: 68200412, Visit: 1500)
Successfully processed
Processing file 222 of 230: 068200414-1500.wsp (Participant: 68200414, Visit: 1500)
Successfully processed
Processing file 223 of 230: 068200441-1500.wsp (Participant: 68200441, Visit: 1500)
Successfully processed
Processing file 224 of 230: 068200282-2120.wsp (Participant: 68200282, Visit: 2120)
Successfully processed
Processing file 225 of 230: 068200438-1500.wsp (Participant: 68200438, Visit: 1500)
Successfully processed
Processing file 226 of 230: 068200397-1500.wsp (Participant: 68200397, Visit: 1500)
Successfully processed
Processing file 227 of 230: 068200425-1500.wsp (Participant: 68200425, Visit: 1500)
Successfully processed
Processing file 228 of 230: 068200439-1500.wsp (Participant: 68200439, Visit: 1500)
Successfully processed
Processing file 229 of 230: 068200428-1500.wsp (Participant: 68200428, Visit: 1500)
Successfully processed
Processing file 230 of 230: 068200407-1500.wsp (Participant: 68200407, Visit: 1500)
Successfully processed
Saved batch results to gating_results_batch_221_to_230.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 24 of 28 ====
Files 231 to 240 of 272 
Processing file 231 of 240: 068200310-2120.wsp (Participant: 68200310, Visit: 2120)
Successfully processed
Processing file 232 of 240: 068200460-1000.wsp (Participant: 68200460, Visit: 1000)
Successfully processed
Processing file 233 of 240: 068200281-2120.wsp (Participant: 68200281, Visit: 2120)
Successfully processed
Processing file 234 of 240: 068200347-2120.wsp (Participant: 68200347, Visit: 2120)
Successfully processed
Processing file 235 of 240: 068200364-2120.wsp (Participant: 68200364, Visit: 2120)
Successfully processed
Processing file 236 of 240: 068200357-2120.wsp (Participant: 68200357, Visit: 2120)
Successfully processed
Processing file 237 of 240: 068200363-2120.wsp (Participant: 68200363, Visit: 2120)
Successfully processed
Processing file 238 of 240: 068200366-2120.wsp (Participant: 68200366, Visit: 2120)
Successfully processed
Processing file 239 of 240: 068200465-1000.wsp (Participant: 68200465, Visit: 1000)
Successfully processed
Processing file 240 of 240: 068200361-2120.wsp (Participant: 68200361, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_231_to_240.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 25 of 28 ====
Files 241 to 250 of 272 
Processing file 241 of 250: 068200385-2120.wsp (Participant: 68200385, Visit: 2120)
Successfully processed
Processing file 242 of 250: 068200278-2120.wsp (Participant: 68200278, Visit: 2120)
Successfully processed
Processing file 243 of 250: 068200358-2120.wsp (Participant: 68200358, Visit: 2120)
Successfully processed
Processing file 244 of 250: 068200398-2120.wsp (Participant: 68200398, Visit: 2120)
Successfully processed
Processing file 245 of 250: 068200421-2120.wsp (Participant: 68200421, Visit: 2120)
Successfully processed
Processing file 246 of 250: 068200460-1100.wsp (Participant: 68200460, Visit: 1100)
Successfully processed
Processing file 247 of 250: 068200356-2120.wsp (Participant: 68200356, Visit: 2120)
Successfully processed
Processing file 248 of 250: 068200378-2120.wsp (Participant: 68200378, Visit: 2120)
Successfully processed
Processing file 249 of 250: 068200380-2120.wsp (Participant: 68200380, Visit: 2120)
Successfully processed
Processing file 250 of 250: 068200370-2120.wsp (Participant: 68200370, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_241_to_250.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 26 of 28 ====
Files 251 to 260 of 272 
Processing file 251 of 260: 068200399-2120.wsp (Participant: 68200399, Visit: 2120)
Successfully processed
Processing file 252 of 260: 068200422-2120.wsp (Participant: 68200422, Visit: 2120)
Successfully processed
Processing file 253 of 260: 068200373_2120.wsp (Participant: 68200373, Visit: 2120)
Successfully processed
Processing file 254 of 260: 068200375_2120.wsp (Participant: 68200375, Visit: 2120)
Successfully processed
Processing file 255 of 260: 068200379_2120.wsp (Participant: 68200379, Visit: 2120)
Successfully processed
Processing file 256 of 260: 068200465-1200.wsp (Participant: 68200465, Visit: 1200)
Successfully processed
Processing file 257 of 260: 068200388-2120.wsp (Participant: 68200388, Visit: 2120)
Successfully processed
Processing file 258 of 260: 068200409-2120.wsp (Participant: 68200409, Visit: 2120)
Successfully processed
Processing file 259 of 260: 068200460-1500.wsp (Participant: 68200460, Visit: 1500)
Successfully processed
Processing file 260 of 260: 068200412-2120.wsp (Participant: 68200412, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_251_to_260.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 27 of 28 ====
Files 261 to 270 of 272 
Processing file 261 of 270: 068200414-2120.wsp (Participant: 68200414, Visit: 2120)
Successfully processed
Processing file 262 of 270: 068200441-2120.wsp (Participant: 68200441, Visit: 2120)
Successfully processed
Processing file 263 of 270: 068200465-1500.wsp (Participant: 68200465, Visit: 1500)
Successfully processed
Processing file 264 of 270: 068200397-2120.wsp (Participant: 68200397, Visit: 2120)
Successfully processed
Processing file 265 of 270: 068200415-2120.wsp (Participant: 68200415, Visit: 2120)
Successfully processed
Processing file 266 of 270: 068200438-2120.wsp (Participant: 68200438, Visit: 2120)
Successfully processed
Processing file 267 of 270: 068200407-2120.wsp (Participant: 68200407, Visit: 2120)
Successfully processed
Processing file 268 of 270: 068200425-2120.wsp (Participant: 68200425, Visit: 2120)
Successfully processed
Processing file 269 of 270: 068200428-2120.wsp (Participant: 68200428, Visit: 2120)
Successfully processed
Processing file 270 of 270: 068200439-2120.wsp (Participant: 68200439, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_261_to_270.RData 
Updated overall results file: gating_results_all.RData

==== Processing Batch 28 of 28 ====
Files 271 to 272 of 272 
Processing file 271 of 272: 068200460-2120.wsp (Participant: 68200460, Visit: 2120)
Successfully processed
Processing file 272 of 272: 068200465-2120.wsp (Participant: 68200465, Visit: 2120)
Successfully processed
Saved batch results to gating_results_batch_271_to_272.RData 
Updated overall results file: gating_results_all.RData

Creating the combined count table

Code
# output directory
out_dir <- str_c(output_dir, "both_combined/")
# Create output directory if it doesn't exist
if (!dir.exists(out_dir)) {
  dir.create(out_dir)
}

Since the gating structure was different at both sites and something within sites for different samples, we load the gate_mapping_completed.csv file that Susan created and that contains the harmonized gating structure for all samples. This mapping ensures we can combine the results from both sites correctly.

Code
gate_mapping <- 
  read.csv(
    str_c(get_data_dir(), "06 Flow cytometry/gate_mapping_completed.csv")
  )

gate_mapping <- 
  gate_mapping |> 
  mutate(
    site = site |> str_replace("SA", "CAP")
  )
Code
gate_mapping |> gt::gt()
site original_gate standardized_name cell_type notes
MGH /Lymphocytes NA NA NA
MGH /Lymphocytes/Singlets NA NA NA
MGH /Lymphocytes/Singlets/Live cells Live Live NA
MGH /Lymphocytes/Singlets/Live cells/Granulocytes Granulocytes Granulocytes NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/B cells B_cells B_cells NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cell_B cells NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC APC APC NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC/APC_CD40+ CD40 CD40 NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC/APC_CD80+ CD80 CD80 NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC/APC_CD86+ CD86 CD86 NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells CD4 CD4 NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_CD38 NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADR NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADR-CD38- NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADRCD38 DblePosCD4 DblePosCD4 NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells CD8 CD8 NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_CD38 NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADR NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADR-CD38- NA NA NA
MGH /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADRCD38 DblePosCD8 DblePosCD8 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset Live Live NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Granulocytes Granulocytes Granulocytes NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/B cells B_cells B_cells NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/non T cell_B cells NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC APC APC NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC/APC_CD40+ CD40 CD40 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC/APC_CD80+ CD80 CD80 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/Non T cells/non T cell_B cells/HLA, SSC-A subset/APC/APC_CD86+ CD86 CD86 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD4 T cells CD4 CD4 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD4 T cells/CD4_CD38 NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD4 T cells/CD4_HLADR NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD4 T cells/CD4_HLADR-CD38- NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD4 T cells/CD4_HLADRCD38 DblePosCD4 DblePosCD4 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD8 T cells CD8 CD8 NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD8 T cells/CD8_CD38 NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD8 T cells/CD8_HLADR NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD8 T cells/CD8_HLADR-CD38- NA NA NA
MGH /Lymphocytes/Singlets/Comp-LiveDead Aqua-A, SSC-A subset/Immune cells/T cells/CD8 T cells/CD8_HLADRCD38 DblePosCD8 DblePosCD8 NA
CAP /Lymphocytes NA NA NA
CAP /Lymphocytes/Singlets NA NA NA
CAP /Lymphocytes/Singlets/Live cells Live Live NA
CAP /Lymphocytes/Singlets/Live cells/Granulocytes Granulocytes Granulocytes NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/B cells B_cells B_cells NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cells_B cells NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cells_B cells/HLA-DR, SSC-A subset NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cells_B cells/HLA-DR, SSC-A subset/APC APC APC NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cells_B cells/HLA-DR, SSC-A subset/APC/APC_CD40+ CD40 CD40 NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cells_B cells/HLA-DR, SSC-A subset/APC/APC_CD80+ CD80 CD80 NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/Non T cells/non T cells_B cells/HLA-DR, SSC-A subset/APC/APC_CD86+ CD86 CD86 NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells CD4 CD4 NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_CD38 NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADR NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADR-CD38- NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADRCD38 DblePosCD4 DblePosCD4 NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells CD8 CD8 NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_CD38 NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADR NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADR-CD38- NA NA NA
CAP /Lymphocytes/Singlets/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADRCD38 DblePosCD8 DblePosCD8 NA
CAP /Lymphocytes/Single Cells NA NA NA
CAP /Lymphocytes/Single Cells/Live cells Live Live NA
CAP /Lymphocytes/Single Cells/Live cells/Granulocytes Granulocytes Granulocytes NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/B cells B_cells B_cells NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/Non T cell B cells NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/Non T cell B cells/HLA, SSC-A subset NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/Non T cell B cells/HLA, SSC-A subset/APC APC APC NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/Non T cell B cells/HLA, SSC-A subset/APC/CD40+ CD40 CD40 NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/Non T cell B cells/HLA, SSC-A subset/APC/CD80+ CD80 CD80 NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/Non T cells/Non T cell B cells/HLA, SSC-A subset/APC/CD86+ CD86 CD86 NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD4 T cells CD4 CD4 NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD4 T cells/CD4_CD38 NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADR NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADR-CD38- NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD4 T cells/CD4_HLADRCD38 DblePosCD4 DblePosCD4 NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD8 T cells CD8 CD8 NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD8 T cells/CD8_CD38 NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADR NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADR-CD38 NA NA NA
CAP /Lymphocytes/Single Cells/Live cells/Immune cells/T cells/CD8 T cells/CD8_HLADRCD38 DblePosCD8 DblePosCD8 NA
Code
results <- 
  aggregate_flow_data(
    mgh_file = str_c(output_dir, "MGH/step 03/gating_results_all.RData"),
    sa_file = str_c(output_dir, "CAP/step 03/gating_results_all.RData"), 
    gate_mapping = gate_mapping, 
    output_dir = out_dir
    )
Loading objects:
  all_results
Loading objects:
  all_results
Processing MGH data...
Processing SA data...
Results saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06 Flow cytometry tmp files/both_combined/ 
Code
flow_data <- results$all_data


# results <- load(str_c(out_dir, "combined_flow_results.RData"), verbose = TRUE)
# flow_data <- all_data

For the MGH data, we add the machine

Code
get_visit_code <- function(visits){
  visit_code <- c(
    "V1" = "1000",
    "V2" = "1100",
    "V3" = "1200",
    "V6" = "1500",
    "V7" = "1700",
    "V9" = "2120"
  ) 
  visit_codes <- 
    visit_code |> 
    as.data.frame() |> 
    rownames_to_column("visit") |>
    as_tibble()
  
  rm(visit_code)
  
  tibble(
    visit = visits |> str_remove_all("\\s+")
  ) |> 
    left_join(
      visit_codes, by = "visit"
    ) |> 
    pull(visit_code)
}

flow_data <- 
  flow_data |> 
  dplyr::left_join(
    mgh_samples |> 
      mutate(
        pid = Subject |> str_remove_all("-") |> str_replace("^068", "68"),
        visit_code =  get_visit_code(Visit),
        machine = Machine
      ) |> 
      select(pid, visit_code, machine),
    by = join_by(pid, visit_code)
  )

SummarizedExperiment object

Creating the SummarizedExperiment object

We now create a SummarizedExperiment object that contains the combined gating results for all samples. This will allow us to integrate the flow cytometry data to the VIBRANT MultiAssayExperiment object.

The features of the SE object will be the harmonized cell_type, and the corresponding rowData will contain the

  • parent_cell_type (to be constructed from the parent_gate)

The samples will be the VIBRANT uid (concatenation of pid and visit_code), and the corresponding colData will contain the

  • sample name (fcs file)
  • machine (for MGH samples, NA for CAP samples)

The SE will contain 2 assays

  • count with the event count for each cell type

  • percentage with the percentage of each cell type relative to its parent gate

In addition, we will add the gate_mapping to the metadata of the SE object, which will allow us to trace back the gating structure for each cell type if needed.

Code
se_flow <- create_SE_from_harmonized_flow_data(flow_data = flow_data)
Warning in create_SE_from_harmonized_flow_data(flow_data = flow_data): There
are multiple samples for the same uid.
# A tibble: 10 × 3
   uid            sample                                                       n
   <chr>          <chr>                                                    <int>
 1 068100026_2120 Date_MGH_Cyto_068_10_0026_V9_007.fcs_0000000000007392714     5
 2 068100026_2120 Date_CD86_006.fcs_0000000000000050000                        5
 3 068100026_2120 Date_CD80_003.fcs_0000000000000050000                        5
 4 068100026_2120 Date_CD40_005.fcs_0000000000000050000                        5
 5 068100026_2120 Date_CD38_004.fcs_0000000000000050000                        5
 6 068100058_1100 11_14_2024_MGH_Cyto_068_10_0058_V2_007.fcs_000000000000…     5
 7 068100058_1100 11_14_2024_CD86_006.fcs_0000000000000050000                  5
 8 068100058_1100 11_14_2024_CD80_003.fcs_0000000000000050000                  5
 9 068100058_1100 11_14_2024_CD40_005.fcs_0000000000000050000                  5
10 068100058_1100 11_14_2024_CD38_004.fcs_0000000000000050000                  5
Taking the first sample for these uid(s).
Code
# Add metadata for gate mapping
metadata(se_flow)$gate_mapping <- gate_mapping |> as_tibble()

Since all samples are clinical samples, we add the expected sample_type and control_type columns to the SE@colData.

Code
se_flow@colData$sample_type <- "Clinical sample"
se_flow@colData$control_type <- ""

Saving the SummarizedExperiment object

Code
se_flow_file <- 
  str_c(
      output_dir |> str_remove("06 Flow cytometry tmp files/"), 
      "06_se_flow_", today() |> str_remove_all("-"), ".rds"
      )

saveRDS(se_flow, file = se_flow_file)

print(str_c("SummarizedExperiment object saved to: ", se_flow_file))
[1] "SummarizedExperiment object saved to: /Users/laurasymul/OneDrive - UCL/Academia/Research/VIBRANT data UCLouvain/actual data/01 Preprocessed and QCed/06_se_flow_20251020.rds"

Checking the agreement between available and expected data

In this section, we check the agreement between the expected data (based on data from CRF 35) and the available data, those for which we have a file (listed in the inventory) and those for which we have actual data (found in the SE).

Code
# Load the CRF data

crf_file <- 
  list.files(
    path = get_01_output_dir(), 
    pattern = "01_crf_clean", 
    full.names = TRUE
  ) |> 
  sort(decreasing = TRUE) |> 
  magrittr::extract(1)
  
load(crf_file, verbose = TRUE)
Loading objects:
  crf_clean
Code
crf_dict_file <- 
  list.files(
    path = get_01_output_dir(), 
    pattern = "01_crf_plates_dictionary", 
    full.names = TRUE
  ) |> 
  sort(decreasing = TRUE) |> 
  magrittr::extract(1)
  
load(crf_dict_file, verbose = TRUE)
Loading objects:
  crf_plates_dictionary
Code
crf35 <- 
  crf_clean$crf35 |> 
  select(pid, dfseq, softcup, rectal_swab, endocervical_cytobrush) |> 
  mutate(
    visit_code = dfseq |> str_pad(width = 4, pad = "0"),
    uid = str_c(pid, "_", visit_code)
    ) 
Code
# Inventory
inventory <- 
  bind_rows(
    read.csv(str_c(output_dir, "MGH/step 02/mgh_sample_inventory_cleaned.csv"), stringsAsFactors = FALSE) |> 
      as_tibble() |> 
      select(pid, visit_code),
    read.csv(str_c(output_dir, "CAP/step 02/sa_sample_inventory_cleaned.csv"), stringsAsFactors = FALSE) |> 
      as_tibble() |> 
      select(pid, visit_code)
  ) |> 
  mutate(
    pid = str_c("0", pid),
    visit_code = visit_code |> as.character(),
    uid = str_c(pid, "_", visit_code)
  ) |> 
  distinct()
Code
se_flow$pid <- se_flow$uid |> str_remove("_.*$") |> as.character()
se_flow$visit_code <- se_flow$uid |> str_remove(".*_") |> as.character()
se_flow$site <- ifelse(str_detect(se_flow$pid, "^06810"), "MGH", "CAP")
Code
# Available data
in_se <- se_flow@colData |> as_tibble()
Code
matched <- 
  full_join(
    crf35 |> mutate(in_CRF_35 = TRUE),
    inventory |> mutate(in_inventory = TRUE),
    by = join_by(pid, visit_code, uid)
  ) |> 
  full_join(in_se |> mutate(in_se = TRUE), by = join_by(pid, visit_code, uid)) |> 
  mutate(
    endocervical_cytobrush = endocervical_cytobrush |> str_replace_na(""),
    cytobrush_collected = endocervical_cytobrush == "Checked",
    in_CRF_35 = in_CRF_35 |> replace_na(FALSE),
    in_inventory = in_inventory |> replace_na(FALSE),
    in_se = in_se |> replace_na(FALSE)
  )


matched <- 
  matched |> 
  dplyr::filter(cytobrush_collected | in_inventory | in_se)

matched <- 
  matched |> 
  mutate(
    data_status = 
      case_when(
        in_inventory & in_se ~ "Available data",
        in_inventory & !in_se ~ "In inventory but no data",
        !in_inventory & !in_se & (as.integer(visit_code) < 1000) ~ "No data (screening visit)",
        !in_inventory & !in_se ~ "No data"
      )
  )

matched <- 
  matched |> 
  select(-dfseq, -in_CRF_35) |> 
  select(site, pid, visit_code, softcup, rectal_swab, endocervical_cytobrush, cytobrush_collected, everything()) |>
  arrange(data_status, visit_code)
Code
matched |> 
  ggplot() +
  aes(x = pid, y = visit_code, col = data_status, shape = cytobrush_collected) +
  geom_point() +
  scale_shape_manual(
    "Cytobrush collected\naccording to CRF 35", values = c(1, 16)
    ) +
  scale_color_manual(
    "Data status",
    values = c("Available data" = "dodgerblue", "In inventory but no data" = "orange", "No data" = "red", "No data (screening visit)" = "gray")
    ) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)
  ) +
  ylab("visit code")

Code
write_csv(
  matched, 
  str_c(output_dir, "flow_data_availability_check.csv")
)

The samples for which we are missing data are

Code
matched |> 
  dplyr::filter(data_status == "In inventory but no data" | data_status == "No data") |>
  select(-site) |> 
  gt::gt()
pid visit_code Softcup Rectal swab endocervical_cytobrush cytobrush_collected uid in_inventory sample machine sample_type control_type in_se data_status
068200252 1000 Checked Checked Checked TRUE 068200252_1000 FALSE NA NA NA NA FALSE No data
068200061 1100 Unchecked Checked Checked TRUE 068200061_1100 FALSE NA NA NA NA FALSE No data
068200247 1100 Checked Checked Checked TRUE 068200247_1100 FALSE NA NA NA NA FALSE No data
068200088 1511 Unchecked Checked Checked TRUE 068200088_1511 FALSE NA NA NA NA FALSE No data

By email (June 9th 2029), Ndivhuwo confirmed that:

  • we do not have data for 068200247-1100 and 068200252-1000, these samples were discarded after contamination during processing;
  • for 068200061 we don’t have any record for visit 1100 or 1200.

For 068200088, their expected visit code is 1511 but it was likely fixed by the lab to be 1500 (to be consistent with the other assays).

Basic QC of available data

Counts

Code
se_flow |> 
  as_tibble() |>
  group_by(uid) |> 
  mutate(live_count = count[cell_type_label == "Live cells"]) |> 
  ungroup() |> 
  mutate(uid = uid |> fct_reorder(live_count)) |> 
  ggplot() +
  aes(x = uid, y = cell_type_label |> fct_rev(), fill = count |> log10()) +
  facet_grid(parent_cell_type ~ site, scales = "free", space = "free") +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "dodgerblue1", na.value = "red") +
  theme(
    axis.text.x = element_blank(),
    strip.text.y = element_text(angle = 0)
    ) +
  xlab("Samples (ordered by live cell counts)") +
  ylab("Cell type") +
  labs(
    caption = "Red cells indicate 0 count. Vertical facets show parent cell types."
  )

Code
se_flow |> 
  as_tibble() |>
  group_by(uid) |> 
  mutate(live_count = count[cell_type_label == "Live cells"]) |> 
  ungroup() |> 
  mutate(uid = uid |> fct_reorder(live_count)) |> 
  ggplot() +
  aes(x = uid, y = cell_type_label |> fct_rev(), fill = count |> log10()) +
  facet_grid(parent_cell_type ~ site + visit_code, scales = "free", space = "free") +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "dodgerblue1", na.value = "red") +
  theme(
    axis.text.x = element_blank(),
    strip.text.x = element_text(angle = 90),
    strip.text.y = element_text(angle = 0)
  ) +
  xlab("Samples (ordered by live cell counts)") +
  ylab("Cell type") +
  labs(
    caption = "Red cells indicate 0 count. Vertical facets show parent cell types."
  )

Code
se_flow |> 
  as_tibble() |>
  ggplot() +
  aes(x = visit_code |> as.integer(), y = count |> log10(), col = site) +
  facet_wrap(. ~ cell_type_label, scales = "free_y", nrow = 2) +
  geom_line(aes(group = pid), alpha = 0.25) + geom_point(alpha = 0.5, size = 0.5) +
  # guides(col = "none") +
  xlab("Visit code") 

It does not look like there is any participant-effect in the cell counts. The strong variability in total live cell appear to be explained by other factors than participants and visits.

Warning

There are lots of cell types with extremely low (sometimes 0) counts. Percentages should be interpreted with a lot of caution in these cases and not used as is for downstream analyses (using a Poisson or NB model will probably be necessary).

Code
se_flow |> 
  as_tibble() |> 
  group_by(.sample) |> 
  mutate(
    flag = 
      any(
        ((.feature == "Live") & (count < 10000)) | 
          ((.feature == "APC") & (count == 0))
      )
  ) |> 
  ungroup() |> 
  dplyr::filter(flag) |> 
  select(.feature, .sample, count) |> 
  pivot_wider(
    names_from = .feature, 
    values_from = count
  ) |> 
  arrange(Live) |> gt()
.sample Live Granulocytes B_cells APC CD40 CD80 CD86 CD4 CD8 DblePosCD4 DblePosCD8
068200407_2120 1188 115 23 14 2 0 1 137 75 49 10
068200358_2120 1420 429 4 3 1 0 2 2 4 0 0
068200407_1100 2359 44 1 3 0 3 3 0 0 0 0
068200465_1500 2988 133 14 46 27 10 34 72 72 10 8
068200282_1100 3071 77 1 1 0 0 0 0 0 0 0
068200347_2120 3245 119 0 2 1 0 0 0 0 0 0
068200179_1500 3290 1180 8 11 11 1 8 25 16 2 4
068200282_2120 3757 296 5 9 8 6 8 14 10 0 0
068100046_1000 3851 1868 31 45 28 18 42 46 23 1 1
068200357_2120 3943 216 0 0 0 0 0 0 0 0 0
068200363_2120 4258 133 1 0 0 0 0 0 0 0 0
068200441_2120 5032 237 46 23 16 6 20 44 62 5 10
068200373_2120 5187 1894 87 3 2 1 3 66 148 22 27
068100058_2120 6429 4707 24 5 1 1 5 29 15 1 3
068200398_1500 6712 4523 36 23 16 0 5 45 86 3 9
068200061_2120 7314 356 1 1 1 0 1 4 1 1 0
068200118_2120 7487 924 8 2 1 0 0 0 1 0 0
068200375_1500 7968 1501 7 2 1 0 0 20 9 0 1
068100036_1500 8449 3973 27 296 210 51 192 660 280 17 27
068200131_1000 8753 1595 2 57 57 7 41 23 32 0 2
068200028_1500 9499 352 18 8 6 0 7 4 6 0 1
068200252_1100 137986 2289 6 0 0 0 0 1 0 0 0
068100058_1100 326122 315574 506 0 0 0 0 568 182 0 0

Percentages

Code
se_flow |> 
  as_tibble() |>
  group_by(uid) |> 
  mutate(live_perc = percentage[cell_type_label == "Granulocytes"]) |> 
  ungroup() |> 
  mutate(uid = uid |> fct_reorder(live_perc)) |> 
  ggplot() +
  aes(x = uid, y = cell_type_label |> fct_rev(), fill = percentage) +
  facet_grid(parent_cell_type ~ site + machine, scales = "free", space = "free") +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "dodgerblue1", na.value = "red") +
  theme(
    axis.text.x = element_blank(),
    strip.text.y = element_text(angle = 0)
    ) +
  xlab("Samples (ordered by granulocytes percentage)") +
  ylab("Cell type") +
  labs(
    caption = "Red cells indicate NA values. Vertical facets show parent cell types."
  )

Code
se_flow |> 
  as_tibble() |>
  group_by(uid) |> 
  mutate(live_perc = percentage[cell_type_label == "Granulocytes"]) |> 
  ungroup() |> 
  mutate(uid = uid |> fct_reorder(live_perc)) |> 
  ggplot() +
  aes(x = uid, y = cell_type_label |> fct_rev(), fill = percentage) +
  facet_grid(parent_cell_type ~ site + visit_code, scales = "free", space = "free") +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "dodgerblue1", na.value = "red") +
  theme(
    axis.text.x = element_blank(),
    strip.text.x = element_text(angle = 90),
    strip.text.y = element_text(angle = 0)
  ) +
  xlab("Samples (ordered by live cell counts)") +
  ylab("Cell type") +
  labs(
    caption = "Red cells indicate 0 count. Vertical facets show parent cell types."
  )

Code
se_flow |> 
  as_tibble() |>
  ggplot() +
  aes(x = visit_code |> as.integer(), y = percentage, col = site) +
  facet_wrap(. ~ cell_type_label, scales = "free_y", nrow = 2) +
  geom_line(aes(group = pid), alpha = 0.25) + 
  geom_point(alpha = 0.5, size = 0.5) +
  # guides(col = "none") +
  xlab("Visit code") 

Granulocytes decreased at visit 1500 and 2120, and CD40 increased… We’ll see in the integrated analyses whether these changes are explained by colonization.

Code
se_flow |> 
  as_tibble() |> 
  dplyr::filter(!is.na(percentage)) |> 
  ggplot() +
  aes(x = count, y = percentage) +
  geom_point(alpha = 0.3, size = 1) +
  facet_wrap(. ~ str_c("parent:\n", parent_cell_type) + cell_type_label, scales = "free_y", nrow = 2) +
  scale_x_log10()

As “feared”, we see more variability in the percentages where the counts are low (less than a hundred).